From 15434a8a60cd1038d115ecc1c4e43d8c090ba0c5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Thu, 19 Jun 2025 20:37:24 -0300 Subject: [PATCH 1/7] Scraped pydata-global-2024 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes #xxx Event config: ~~~yaml repo_dir: W:\Repositories\pyvideo-data # Copy the event template here and adapt to the event parameters # Only repo_dir: and events: are loaded # ============================================================================= events: # - title: PyData Virginia 2025 # dir: pydata-virginia-2025 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qLS7Mk-jI9jhb4t5UY6yDW # related_urls: # - label: Conference Website # url: https://pydata.org/virginia2025 # language: eng # dates: # begin: 2025-04-18 # end: 2025-04-19 # default: 2025-04-18 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Global 2020 # dir: pydata-global-2020 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r0eC9BnITmYJ786p9Y1Q8D # related_urls: # - label: Conference Website # url: https://pydataglobal.github.io/ # language: eng # dates: # begin: 2020-11-11 # end: 2020-11-15 # default: 2020-11-11 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: - title: PyData Global 2024 dir: pydata-global-2024 youtube_list: - https://www.youtube.com/playlist?list=PLGVZCDnMOq0otKlHvES9iBFtVQ71yZhed related_urls: - label: Conference Website url: https://pydata.org/global2024 language: eng dates: begin: 2024-12-03 end: 2024-12-05 default: 2024-12-03 minimal_download: false issue: xxx overwrite: # all: true # takes precedence over add_new_files and existing_files_fields add_new_files: true existing_files_fields: - duration - thumbnail_url - videos - description - language - recorded - related_urls - speakers - tags - title tags: # - title: SciPy 2024 # dir: scipy-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PL1PbeFStIOoO7rDLs431H-rn0h24Wr80S # related_urls: # - label: Conference Website # url: https://www.scipy2024.scipy.org/ # language: eng # dates: # begin: 2024-07-08 # end: 2024-07-14 # default: 2024-07-08 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData New York City 2024 # dir: pydata-new-york-city-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0ohEIZ-_wM2W_xqSVjyA3dC # related_urls: # - label: Conference Website # url: https://pydata.org/nyc2024 # language: eng # dates: # begin: 2024-11-06 # end: 2024-11-08 # default: 2024-11-06 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Tel Aviv 2024 # dir: pydata-tel-avid-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pRsGPxDvLZfuufNgqREc0a # related_urls: # - label: Conference Website # url: https://pydata.org/telaviv2024/ # language: eng # dates: # begin: 2024-11-04 # end: 2024-11-04 # default: 2024-11-04 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Paris 2024 # dir: pydata-paris-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pKya8gksd00ennKuyoH7v7 # related_urls: # - label: Conference Website # url: https://pydata.org/paris2024 # language: eng # dates: # begin: 2024-09-25 # end: 2024-09-26 # default: 2024-09-25 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Amsterdam 2024 # dir: pydata-amsterdam-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0reU2lzNZCn9obkyRVaSnpF # related_urls: # - label: Conference Website # url: https://web.archive.org/web/20240822042916/https://amsterdam.pydata.org/ # language: eng # dates: # begin: 2024-09-18 # end: 2024-09-20 # default: 2024-09-18 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Vermont 2024 # dir: pydata-vermont-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pME_xSRdmoYFzhlsHJYM8I # related_urls: # - label: Conference Website # url: https://pydata.org/vermont2024/ # language: eng # dates: # begin: 2024-07-29 # end: 2024-07-30 # default: 2024-07-29 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Eindhoven 2024 # dir: pydata-eindhoven-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q7a2aoNP1au_1egfZEjGL6 # related_urls: # - label: Conference Website # url: https://pydata.org/eindhoven2024/ # language: eng # dates: # begin: 2024-07-11 # end: 2024-07-11 # default: 2024-07-11 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData London 2024 # dir: pydata-london-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rrhYTNedKKuJ9716fEaAdK # related_urls: # - label: Conference Website # url: https://pydata.org/london2024/ # language: eng # dates: # begin: 2024-06-14 # end: 2024-06-16 # default: 2024-06-14 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Berlin 2024 # dir: pydata-berlin-2024 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0r2tGyr-hjbnCrjXRkCMvwB # related_urls: # - label: Conference Website # url: https://2024.pycon.de/ # language: eng # dates: # begin: 2024-06-14 # end: 2024-06-16 # default: 2024-06-14 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Global 2023 # dir: pydata-global-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0poULd1C4oUdPbPkTe4poJx # related_urls: # - label: Conference Website # url: https://pydata.org/global2023/ # language: eng # dates: # begin: 2023-12-06 # end: 2023-12-08 # default: 2023-12-06 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Eindhoven 2023 # dir: pydata-eindhoven-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qkbJjIfppGO44yhDV2i4gR # related_urls: # - label: Conference Website # url: https://web.archive.org/web/20240930133013/http://pydata.org/eindhoven2023 # language: eng # dates: # begin: 2023-11-30 # end: 2023-11-30 # default: 2023-11-30 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData New York City 2023 # dir: pydata-new-york-city-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0o79mT1hHyqtFDSNzXXSYQM # related_urls: # - label: Conference Website # url: https://pydata.org/nyc2023/ # language: eng # dates: # begin: 2023-11-01 # end: 2023-11-03 # default: 2023-11-01 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Amsterdam 2023 # dir: pydata-amsterdam-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pADyz2VboxPFIdrsozlENg # related_urls: # - label: Conference Website # url: https://amsterdam2023.pydata.org/cfp/schedule/ # language: eng # dates: # begin: 2023-09-14 # end: 2023-09-16 # default: 2023-09-14 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Seattle 2023 # dir: pydata-seattle-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0q81_-rt5jzJ--ZEgcNArKb # related_urls: # - label: Conference Website # url: https://pydata.org/seattle2023/ # language: eng # dates: # begin: 2023-04-26 # end: 2023-04-28 # default: 2023-04-26 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Berlin 2023 # dir: pydata-berlin-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0peDguAzds7kVmBr8avp46K # related_urls: # - label: Conference Website # url: https://2023.pycon.de/ # language: eng # dates: # begin: 2023-04-17 # end: 2023-04-19 # default: 2023-04-17 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Yerevan 2023 # dir: pydata-yerevan-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pJKftCB2BtalTDE-2xS20g # language: eng # dates: # begin: 2023-10-23 # end: 2024-11-07 # default: 2023-10-23 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Trójmiasto 2023 # dir: pydata-trojmiasto-2023 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qS0mI7s9tpXnS-XV5l_Ibs # related_urls: # - label: Conference Website # url: https://www.meetup.com/pl-PL/pydata-trojmiasto/ # language: eng # dates: # begin: 2023-10-24 # end: 2023-10-24 # default: 2023-10-24 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Tel Avid 2022 # dir: pydata-tel-avid-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p6o_fjjdNPqy1rps49z2S0 # related_urls: # - label: Conference Website # url: https://pydata.org/telaviv2022/ # language: eng # dates: # begin: 2022-12-13 # end: 2022-12-13 # default: 2022-12-13 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Eindhoven 2022 # dir: pydata-eindhoven-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0pI60MsrFpHcII1qWm7drmZ # related_urls: # - label: Conference Website # url: https://pydata.org/eindhoven2022/ # language: eng # dates: # begin: 2022-12-02 # end: 2022-12-02 # default: 2022-12-02 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Global 2022 # dir: pydata-global-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qgYUt0yn7F80wmzCnj2dEq # related_urls: # - label: Conference Website # url: https://pydata.org/global2022/ # language: eng # dates: # begin: 2022-12-01 # end: 2022-12-03 # default: 2022-12-01 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData New York City 2022 # dir: pydata-new-york-city-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0opPc5-dp6ZDCFvOqDBlUuv # related_urls: # - label: Conference Website # url: https://pydata.org/nyc2022/ # language: eng # dates: # begin: 2022-11-09 # end: 2022-11-11 # default: 2022-11-09 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Yerevan 2022 # dir: pydata-yerevan-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qWwVVDmdOw6oxAlqqH8Ca- # related_urls: # - label: Conference Website # url: https://pydata.org/yerevan2022/ # language: eng # dates: # begin: 2022-08-12 # end: 2022-08-13 # default: 2022-08-12 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData London 2022 # dir: pydata-london-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qT0MXnci7VBSF-U-0WaQ-w # related_urls: # - label: Conference Website # url: https://pydata.org/london2022/ # language: eng # dates: # begin: 2022-06-17 # end: 2022-06-19 # default: 2022-06-17 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Berlin 2022 # dir: pydata-berlin-2022 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0p0Fal8_YKg6fPXnf3iPtwD # related_urls: # - label: Conference Website # url: https://2022.pycon.de/ # language: eng # dates: # begin: 2022-04-11 # end: 2022-04-13 # default: 2022-04-11 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Global 2021 # dir: pydata-global-2021 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rHb3JXG6puQnUAclFFZMlh # related_urls: # - label: Conference Website # url: https://pydata.org/global2021/ # language: eng # dates: # begin: 2021-10-28 # end: 2021-10-30 # default: 2021-10-28 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Eindhoven 2021 # dir: pydata-eindhoven-2021 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0rBKcoKoaWJiMrDGdNr2_S0 # related_urls: # - label: Conference Website # url: https://pydata.org/eindhoven2021/ # language: eng # dates: # begin: 2021-11-12 # end: 2021-11-12 # default: 2021-11-12 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Eindhoven 2020 # dir: pydata-eindhoven-2020 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qpKjuGgNOgtOxIuATvnqEr # related_urls: # - label: Conference Website # url: https://pydata.org/eindhoven2020/schedule/ # language: eng # dates: # begin: 2020-10-07 # end: 2020-10-09 # default: 2020-10-07 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Amsterdam 2020 # dir: pydata-amsterdam-2020 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0oX4ymLgldSvpfiZj-S8-fH # related_urls: # - label: Conference Website # url: https://datasciencedistrict.nl/pydata-festival-amsterda/ # language: eng # dates: # begin: 2020-06-15 # end: 2020-06-20 # default: 2020-06-15 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData South Africa 2018 # dir: pydata-south-africa-2018 # youtube_list: # - https://www.youtube.com/watch?v=Lvw3Lp3KrTM&list=PLGjWYNrNnSuc78h5x23A5mLAzWlCl9LGf # related_urls: # - label: Conference Website # url: https://2018.za.pycon.org/ # language: eng # dates: # begin: 2018-10-11 # end: 2018-10-12 # default: 2018-10-11 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # - title: PyData Hamburg 2021 # dir: pydata-hamburg-2021 # youtube_list: # - https://www.youtube.com/playlist?list=PLGVZCDnMOq0qbRG8gBRkosFfhWrObasQF # related_urls: # - label: Conference Website # url: https://www.meetup.com/pydata-hamburg/ # language: eng # dates: # begin: 2020-11-03 # end: 2021-03-03 # default: 2021-03-03 # minimal_download: false # issue: xxx # overwrite: # # all: true # takes precedence over add_new_files and existing_files_fields # add_new_files: true # existing_files_fields: # - duration # - thumbnail_url # - videos # - description # - language # - recorded # - related_urls # - speakers # - tags # - title # tags: # ISO_639-3 language codes https://en.wikipedia.org/wiki/ISO_639-3 # languages = { # 'ita': 'Italian', # 'zho': 'Chinese', # 'por': 'Portuguese', # 'ukr': 'Ukrainian', # 'deu': 'German', # 'eng': 'English', # 'rus': 'Russian', # 'fra': 'French', # 'spa': 'Spanish', # 'eus': 'Basque', # 'cat': 'Catalan', # 'glg': 'Galician', # 'kor': 'Korean', # 'lit': 'Lithuanian', # 'jpn': 'Japanese', # 'ces': 'Czech', # 'pol': 'Polish', # 'heb': 'Hebrew', # 'tha': 'Thai', # } ~~~ Scraped with [pyvideo_scrape](https://github.com/pyvideo/pyvideo_scrape) --- pydata-global-2024/category.json | 3 ++ ...-duckdb-and-python-pydata-global-2024.json | 43 +++++++++++++++++ ...tching-in-networkx-pydata-global-2024.json | 43 +++++++++++++++++ ...deepfake-detection-pydata-global-2024.json | 43 +++++++++++++++++ ...he-future-of-forecasting-or-just-hype.json | 43 +++++++++++++++++ ...r-vector-databases-pydata-global-2024.json | 43 +++++++++++++++++ ...s-with-statsmodels-pydata-global-2024.json | 43 +++++++++++++++++ ...gents-with-structured-text-generation.json | 43 +++++++++++++++++ ...zero-to-production-pydata-global-2024.json | 43 +++++++++++++++++ ...gression-workflows-pydata-global-2024.json | 43 +++++++++++++++++ ...on-and-scalability-pydata-global-2024.json | 43 +++++++++++++++++ ...ions-and-ship-fast-pydata-global-2024.json | 43 +++++++++++++++++ ...projects-using-nix-pydata-global-2024.json | 43 +++++++++++++++++ ...for-fun-and-profit-pydata-global-2024.json | 43 +++++++++++++++++ ...n-processes-useful-pydata-global-2024.json | 43 +++++++++++++++++ ...models-for-japanese-medical-documents.json | 43 +++++++++++++++++ ...ur-own-transformer-pydata-global-2024.json | 47 +++++++++++++++++++ ...-travel-agent-that-never-hallucinates.json | 43 +++++++++++++++++ ...-decisions-using-r-pydata-global-2024.json | 43 +++++++++++++++++ ...nt-with-pixeltable-pydata-global-2024.json | 43 +++++++++++++++++ ...-track-and-optimize-model-performance.json | 43 +++++++++++++++++ ...ion-in-the-browser-pydata-global-2024.json | 43 +++++++++++++++++ ...th-modelingtoolkit-pydata-global-2024.json | 43 +++++++++++++++++ ...uncertainty-quantification-with-mapie.json | 43 +++++++++++++++++ ...ython-applications-pydata-global-2024.json | 43 +++++++++++++++++ ...learning-ecosystem-pydata-global-2024.json | 43 +++++++++++++++++ ...ramming-in-data-engineering-workflows.json | 43 +++++++++++++++++ ...rew-a-scaling-saga-pydata-global-2024.json | 43 +++++++++++++++++ ...coherence-of-open-source-eval-metrics.json | 43 +++++++++++++++++ ...i-agents-with-burr-pydata-global-2024.json | 43 +++++++++++++++++ ...y-python-is-a-joke-pydata-global-2024.json | 43 +++++++++++++++++ ...our-optima-combine-pydata-global-2024.json | 43 +++++++++++++++++ ...e-for-data-science-pydata-global-2024.json | 47 +++++++++++++++++++ ...losc2-and-caterva2-pydata-global-2024.json | 47 +++++++++++++++++++ ...usive-load-monitoring-for-iot-devices.json | 43 +++++++++++++++++ ...e-of-two-languages-pydata-global-2024.json | 43 +++++++++++++++++ ...dence-using-duckdb-pydata-global-2024.json | 43 +++++++++++++++++ ...-data-science-in-university-education.json | 43 +++++++++++++++++ ...e-at-massive-scale-pydata-global-2024.json | 43 +++++++++++++++++ ...-on-with-scalable-serverless-analysis.json | 43 +++++++++++++++++ ...-arc-agi-challenge-pydata-global-2024.json | 43 +++++++++++++++++ ...e-not-all-you-need-pydata-global-2024.json | 43 +++++++++++++++++ ...ython-environments-pydata-global-2024.json | 43 +++++++++++++++++ ...ibrary-development-pydata-global-2024.json | 43 +++++++++++++++++ ...els-from-references-to-human-judgment.json | 43 +++++++++++++++++ ...ms-in-r-and-python-pydata-global-2024.json | 43 +++++++++++++++++ ...nfair-bias-in-machine-learning-models.json | 43 +++++++++++++++++ ...cropython-in-1-2-3-pydata-global-2024.json | 43 +++++++++++++++++ ...ics-with-faustream-pydata-global-2024.json | 43 +++++++++++++++++ ...tackling-common-data-challenges-in-ml.json | 43 +++++++++++++++++ ...nt-with-python-101-pydata-global-2024.json | 43 +++++++++++++++++ ...-and-supercharge-your-pydata-workflow.json | 47 +++++++++++++++++++ ...r-in-our-ai-future-pydata-global-2024.json | 43 +++++++++++++++++ ...on-changepoint-detection-segmentation.json | 43 +++++++++++++++++ ...tion-via-regression-on-shapley-values.json | 43 +++++++++++++++++ ...ove-agricultural-resilience-in-africa.json | 43 +++++++++++++++++ ...n-satellite-images-pydata-global-2024.json | 43 +++++++++++++++++ ...h-polars-deltalake-pydata-global-2024.json | 43 +++++++++++++++++ ...mat-for-multi-modal-ai-data-pipelines.json | 43 +++++++++++++++++ ...g-are-we-there-yet-pydata-global-2024.json | 43 +++++++++++++++++ ...e-simple-by-pycafe-pydata-global-2024.json | 43 +++++++++++++++++ ...impact-of-effort-on-business-outcomes.json | 43 +++++++++++++++++ ...sing-data-prep-kit-pydata-global-2024.json | 47 +++++++++++++++++++ ...d-llm-training-and-inference-pipeline.json | 43 +++++++++++++++++ ...-dataframe-columns-pydata-global-2024.json | 43 +++++++++++++++++ ...-foundational-llms-pydata-global-2024.json | 43 +++++++++++++++++ ...n-apache-spark-4-0-pydata-global-2024.json | 43 +++++++++++++++++ ...ht-in-ci-pipelines-pydata-global-2024.json | 43 +++++++++++++++++ ...build-a-core-ml-platform-from-scratch.json | 43 +++++++++++++++++ ...uals-with-plotnine-pydata-global-2024.json | 43 +++++++++++++++++ ...-the-factory-floor-pydata-global-2024.json | 43 +++++++++++++++++ ...ython-and-cesiumjs-pydata-global-2024.json | 43 +++++++++++++++++ ...can-complete-the-future-of-innovation.json | 43 +++++++++++++++++ ...ents-demo-showcase-pydata-global-2024.json | 43 +++++++++++++++++ ...-models-for-graphs-pydata-global-2024.json | 43 +++++++++++++++++ ...for-live-inference-pydata-global-2024.json | 43 +++++++++++++++++ ...nto-llm-evaluation-pydata-global-2024.json | 43 +++++++++++++++++ ...nd-vector-search-to-empower-retrieval.json | 43 +++++++++++++++++ ...xperimental-design-pydata-global-2024.json | 43 +++++++++++++++++ ...tching-of-large-datasets-using-splink.json | 47 +++++++++++++++++++ ...-polars-data-types-pydata-global-2024.json | 47 +++++++++++++++++++ ...ronous-programming-pydata-global-2024.json | 43 +++++++++++++++++ ...-open-source-tools-pydata-global-2024.json | 43 +++++++++++++++++ ...lving-role-of-the-r-development-guide.json | 47 +++++++++++++++++++ ...rategies-for-scientists-and-engineers.json | 43 +++++++++++++++++ ...g-pydata-workflows-pydata-global-2024.json | 43 +++++++++++++++++ ...ientific-community-pydata-global-2024.json | 43 +++++++++++++++++ ...oyment-with-kitops-pydata-global-2024.json | 43 +++++++++++++++++ ...tion-in-production-pydata-global-2024.json | 43 +++++++++++++++++ ...nce-engagement-and-higher-conversions.json | 43 +++++++++++++++++ ...st-preference-data-pydata-global-2024.json | 43 +++++++++++++++++ ...sensor-data-in-rag-pydata-global-2024.json | 43 +++++++++++++++++ ...ckage-for-efficient-big-data-analysis.json | 47 +++++++++++++++++++ ...fficiency-personalization-and-insight.json | 43 +++++++++++++++++ ...earning-with-river-pydata-global-2024.json | 43 +++++++++++++++++ ...vernance-solutions-pydata-global-2024.json | 43 +++++++++++++++++ ...a-smaller-data-era-pydata-global-2024.json | 43 +++++++++++++++++ ...terprise-documents-pydata-global-2024.json | 43 +++++++++++++++++ ...identify-urgent-messages-in-real-time.json | 43 +++++++++++++++++ 99 files changed, 4253 insertions(+) create mode 100644 pydata-global-2024/category.json create mode 100644 pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json create mode 100644 pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json create mode 100644 pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json create mode 100644 pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json create mode 100644 pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json create mode 100644 pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json create mode 100644 pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json create mode 100644 pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json create mode 100644 pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json create mode 100644 pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json create mode 100644 pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json create mode 100644 pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json create mode 100644 pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json create mode 100644 pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json create mode 100644 pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json create mode 100644 pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json create mode 100644 pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json create mode 100644 pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json create mode 100644 pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json create mode 100644 pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json create mode 100644 pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json create mode 100644 pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json create mode 100644 pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json create mode 100644 pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json create mode 100644 pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json create mode 100644 pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json create mode 100644 pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json create mode 100644 pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json create mode 100644 pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json create mode 100644 pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json create mode 100644 pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json create mode 100644 pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json diff --git a/pydata-global-2024/category.json b/pydata-global-2024/category.json new file mode 100644 index 000000000..6cbf0d254 --- /dev/null +++ b/pydata-global-2024/category.json @@ -0,0 +1,3 @@ +{ + "title": "PyData Global 2024" +} diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json new file mode 100644 index 000000000..f9525743d --- /dev/null +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nDuckDB is revolutionizing data processing by enabling in-memory OLAP SQL operations with a lightweight, dependency-free architecture. This talk explores how DuckDB can be leveraged to handle large-scale, massively parallel data processing, ranging from hundreds of gigabytes to terabytes, outside traditional SQL and Spark warehouse systems. We will go over the integration with the Python ecosystem and demonstrate its scaling potential using the cloud compute.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1772, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/qSs5ALVbzTk/maxresdefault.jpg", + "title": "Adarsh Namala - Scaling Outside the Warehouse Using DuckDB and Python | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=qSs5ALVbzTk" + } + ] +} diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json new file mode 100644 index 000000000..6db0ac731 --- /dev/null +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nHi! Have you ever wished your pure Python libraries were faster? Or wanted to fundamentally improve a Python library by rewriting everything in a faster language like C or Rust? Well, wish no more... NetworkX's backend dispatching mechanism redirects your plain old NetworkX function calls to a FASTER implementation present in a separate backend package by leveraging the Python's entry_point specification!\n\nNetworkX is a popular, pure Python library used for graph(aka network) analysis. But when the graph size increases (like a network of everyone in the world), then NetworkX algorithms could take days to solve a simple graph analysis problem. So, to address these performance issues, a backend dispatching mechanism was recently developed. In this talk, we will unveil this dispatching mechanism and its implementation details, and how we can use it just by specifying a backend kwarg like this:\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1746, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2UkZVKj6QGY/maxresdefault.jpg", + "title": "Aditi Juneja - Understanding API Dispatching in NetworkX | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2UkZVKj6QGY" + } + ] +} diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json new file mode 100644 index 000000000..6021e8812 --- /dev/null +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk will cover how to use pre-trained HuggingFace models, specifically wav2vec 2.0 and WavLM, to detect audio deepfakes. These deepfakes, made possible by advanced voice cloning tools like ElevenLabs and Respeecher, present risks in areas like misinformation, fraud, and privacy violations. The session will introduce deepfake audio, discuss current trends in voice cloning, and provide a hands-on tutorial for using these transformer-based models to identify synthetic voices by spotting subtle anomalies. Participants will learn how to set up these models, analyze deepfake audio datasets, and assess detection performance, bridging the gap between speech generation and detection technologies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1857, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/MGRmKlDj9rk/maxresdefault.jpg", + "title": "Adriana Stan - Off-the-shelf HuggingFace models for audio deepfake detection | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=MGRmKlDj9rk" + } + ] +} diff --git a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json new file mode 100644 index 000000000..69ba56eda --- /dev/null +++ b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nBeneath the buzz of AI breakthroughs, a quiet revolution is unfolding in the world of forecasting: foundational time series models. These models promise to change the game for operational forecasting, but don\u2019t expect magic. You won\u2019t suddenly become a stock market oracle just by throwing data at them.\n\nIn this talk, we\u2019ll peel back the layers of these new time series models, starting with how they work and how they evolved from transformers. We\u2019ll tackle the big problems of limited data and overhyped algorithms, and explore the real-world challenges that make or break forecasts (hint: human input matters).\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1865, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/5Nt0p_3zU7g/maxresdefault.jpg", + "title": "Ahad Shoaib - Foundational Time Series Models in Practice: The Future of Forecasting, or Just Hype?", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=5Nt0p_3zU7g" + } + ] +} diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json new file mode 100644 index 000000000..24651e4ea --- /dev/null +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nVector databases are everywhere, powering LLMs. But indexing embeddings, especially multivector embeddings like ColPali and Colbert, at a bulk is memory intensive. Vector streaming solves this problem by parallelizing the tasks of parsing, chunking, and embedding generation and indexing it continuously chunk by chunk instead of bulk. This not only increase the speed but also makes the whole task more optimized and memory efficient.\n\nThe library gives many vector database supports, like Pinecone, Weavaite, and Elastic.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1680, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/FdOeLY3rGA8/maxresdefault.jpg", + "title": "Akshay Ballal & Sonam Pankaj-The Memory Efficient Indexing for Vector Databases | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=FdOeLY3rGA8" + } + ] +} diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json new file mode 100644 index 000000000..c260dd4a2 --- /dev/null +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTime series analysis provides essential tools for modeling and predicting time-dependent data, especially data exhibiting seasonal patterns or serial correlation. This tutorial covers tools in the StatsModels library including seasonal decomposition and ARIMA. We'll develop the ARIMA model bottom-up, implementing it one piece at a time, and then using StatsModels. As examples, we'll look at weather data and electricity generation from renewable sources in the United States since 2004 -- but the methods we'll cover apply to many kinds of real-world time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5376, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/foMbacbuAQk/maxresdefault.jpg", + "title": "Allen Downey - Time Series Analysis with StatsModels | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=foMbacbuAQk" + } + ] +} diff --git a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json new file mode 100644 index 000000000..415870272 --- /dev/null +++ b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nKnowledge graphs are excellent at representing and storing heterogeneous and interconnected information in a structured manner, effectively capturing complex relationships and attributes across different data types.\nStructured text generation allows for building knowledge graphs by providing neatly structured outputs, making it an ideal method for extracting structured information.\nSimilarly, structured text generation enables the creation of agents by defining which tools are allowed and what action inputs are permitted.\nIn this talk, we first build a graph database from unstructured data and then we create an agent to query the graph database. We will show these capabilities with a demo.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1696, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/94yuQKoDKkE/maxresdefault.jpg", + "title": "Alonso Silva - Building Knowledge Graph-Based Agents with Structured Text Generation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=94yuQKoDKkE" + } + ] +} diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json new file mode 100644 index 000000000..330da35d5 --- /dev/null +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTaking any project from zero to production is challenging. And Data Science has a particularly high failure rate, with a lot of ideas not getting beyond the prototype stage.\n\nBut there are real reasons for this: there is intrinsic and unknown complexity in data, and there are often big challenges knowing if we have actually solved the problem -- the answer is so rarely \"yes\" or \"no\".\n\nIn this talk I'll cover some key learnings from a decade working on DS problems at early- and later-stage startups, building products to improve product market fit.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1706, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/FA1TWdxoyV4/maxresdefault.jpg", + "title": "Andrew Weeks - Taking Data Science in industry from zero to production | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=FA1TWdxoyV4" + } + ] +} diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json new file mode 100644 index 000000000..8bfc685e3 --- /dev/null +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk showcases and exemplifies the rapid specification and execution of Quantile Regression workflows. Various use cases are discussed, including fitting, outlier detection, conditional CDFs, and simulations, using different types of time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1752, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Z2uz7kwBli8/maxresdefault.jpg", + "title": "Anton Antonov - Quantile Regression Workflows | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Z2uz7kwBli8" + } + ] +} diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json new file mode 100644 index 000000000..24d243b9c --- /dev/null +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLearn how we built a lightning-fast search engine using Python, balancing speed, relevance, and scalability. In this session, we\u2019ll explore our hybrid approach, blending vector search with traditional keyword indexing to deliver high quality, accurate results. Discover how we harness a high-performance NoSQL database for efficient data management and fine-tune our results with a re-ranking algorithm for top-notch accuracy.\nWe\u2019ll dive into the hurdles we overcame, like ensuring data consistency in a NoSQL setup, balancing search precision and performance, and designing a scalable architecture. By the end, you\u2019ll understand how this Python-powered engine works, its real-world applications, and the innovative solutions that set it apart.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1669, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/wn1L3hlYfc0/maxresdefault.jpg", + "title": "Art Anderson - A Deep Dive into Python-Powered Precision and Scalability | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wn1L3hlYfc0" + } + ] +} diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json new file mode 100644 index 000000000..08c1bc9dc --- /dev/null +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nRapid adoption of generative AI requires ensuring your application is trustworthy. Careful experimentation and measurement are necessary for this new era of non-deterministic software. In this talk, we will take learnings from 100s of conversations across enterprise AI teams, and discuss how developers can mitigate hallucinations, better inspect their AI systems, and productionize applications with effective guardrails and evaluation checks in place.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1737, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/3iQFdcVf9jI/maxresdefault.jpg", + "title": "Atin Sanyal- Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3iQFdcVf9jI" + } + ] +} diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json new file mode 100644 index 000000000..332d2652c --- /dev/null +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAs data scientists and machine learning engineers, it is crucial that we can reproduce results and seamlessly share projects across teams and stakeholders. However, differing operating systems, Python environments, package versions, and package managers often hinder reproducibility across different machines. This talk will explore how Nix can be leveraged to create reproducible work environments and how it can be a convenient tool for any Data Scientist or ML Engineer.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1739, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/QgQzxcPZWxA/maxresdefault.jpg", + "title": "Avik Basu - Reproducible Python projects using Nix | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=QgQzxcPZWxA" + } + ] +} diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json new file mode 100644 index 000000000..63a6ce2cd --- /dev/null +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn this talk, we will explore Judea Pearl\u2019s causal ladder (association, intervention, and counterfactuals) through the lens of a simple demand forecasting model. Using real-world business scenarios, I will demonstrate how to move beyond correlation-based predictions to more actionable decisions using PyMC\u2019s causal inference tools. Attendees will learn how to make forecasts for natural business conditions, simulate the effects of strategic changes (like increased advertising spend), and evaluate the causal impact of past price promotion with retrodictive causal inference.\n\nTarget audience: Data scientists, machine learning engineers, and business analysts looking to improve their decision-making using causal inference.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1798, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ajLPA34upQY/maxresdefault.jpg", + "title": "Benjamin Vincent - Climbing the causal ladder for fun and profit | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ajLPA34upQY" + } + ] +} diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json new file mode 100644 index 000000000..38632d67a --- /dev/null +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThe goal of this tutorial is to make Gaussian processes (GPs) useful. In most practicing data scientists' mental map of modeling and machine learning techniques, Gaussian processes are an advanced approach that sit alone on an island, perhaps with narrow use cases like Bayesian optimization. Most books and other material on GPs tend to focus on theoretical aspects, and it can be hard to close the gap between the theory and putting those ideas into practice to solve real problems in a reasonable amount of time.\n\nThis tutorial is split into two parts. The first part introduces Bayesian modeling, focusing on hierarchical modeling and the concept of partial pooling. We\u2019ll use the classic example of estimating the batting average of a group of baseball players as motivation. Then we\u2019ll introduce GPs as a useful generalization of hierarchical modeling for the common situation where our groups aren\u2019t distinct categories. Instead of thinking of each baseball player as completely distinct and exchangeable entities, we can use a GP to partially pool information locally by also considering each player's age. Finally we\u2019ll close the first part by connecting back to the more common introduction to GPs as infinite dimensional multivariate normals.\n\nThe second part of the tutorial will give an overview of practical tips and tricks for modeling with GPs using the open source Python package PyMC. Specifically, how to address the two big issues to using GPs in practice: scaling and identifiability. We\u2019ll discuss useful approximations like the HSGP and when to apply them, advice on when to use splines, and finally when you need to step out of a PPL like PyMC or Stan to a GP specific library like GPFlow or GPyTorch. We\u2019ll do so with a couple motivating examples. The audience should have some familiarity with basic ML and statistics concepts, such as probability distributions, normal and multivariate normal distributions, correlation and covariance, and linear regression - but the talk will aim to be non-technical and the goal will be introduce GPs and give people the tools they need to use them effectively in practice.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5385, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/fi_S89jgUYU/maxresdefault.jpg", + "title": "Bill Engels & Chris Fonnesbeck - Making Gaussian Processes Useful | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=fi_S89jgUYU" + } + ] +} diff --git a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json new file mode 100644 index 000000000..e250508fa --- /dev/null +++ b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTo identify a production-ready, open-source OCR model capable of handling sensitive, non-English content with highly technical language, we evaluated the performance of available open-source OCR models in terms of accuracy, memory efficiency, and processing speed. This presentation will share our findings and key insights gained from this research\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2197, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/OitWeFVvShc/maxresdefault.jpg", + "title": "Bing Wang - An Evaluation of Open-Source OCR Models for Japanese Medical Documents", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=OitWeFVvShc" + } + ] +} diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json new file mode 100644 index 000000000..f5383d8aa --- /dev/null +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nColab Notebook Link: https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing\nMake a copy to your local drive to start working on this notebook.\n\nEver wondered how groundbreaking language models like ChatGPT and Llama were built? The answer lies in transformer, a powerful neural network architecture. In this workshop, we'll dive deep into the inner workings of transformers, with specific focus on self-attention mechanism. We will guide you through the process of building one from scratch. Whether you're a beginner or an experienced practitioner, this workshop is designed to cater to all levels of expertise.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5337, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing", + "url": "https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/TWxD76J5Uho/maxresdefault.jpg", + "title": "Borar, Liu, & Shrivastava - Build Your Own Transformer | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=TWxD76J5Uho" + } + ] +} diff --git a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json new file mode 100644 index 000000000..343853392 --- /dev/null +++ b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLMs offer powerful capabilities, but deploying them effectively in production remains a challenge for conversational AI and Chatbot applications, especially when it comes to minimizing hallucinations and ensuring accurate responses. In this 90-minute hands-on tutorial, we\u2019ll explore building conversational AI systems using CALM and Rasa. CALM (Conversational AI Language Model) combines traditional conversational AI techniques with LLMs, separating conversational ability from business logic execution to deliver reliable, cost efficient, and scalable solutions. Unlike LLMs that handle both sides of the conversation, CALM focuses on user understanding with predefined business logic. This approach not only accelerates development but also enhances cost efficiency, scalability and reliability. By focusing on predefined business logic with CALM, you\u2019ll gain the ability to build sophisticated, scalable systems faster. You\u2019ll also learn how to use fine-tuned, open-weight models, such as llama 8b to power your AI assistant.\n\nParticipants will learn how to use CALM for business logic and Rasa for dialogue management, with practical insights, code examples, and best practices. Materials will be provided via a GitHub repository with a GitHub Codespace for easy access and execution.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 4953, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/V7HQCMcaJ8A/maxresdefault.jpg", + "title": "Bowne-Anderson, Nichol, & Petraityt\u0117 - Building an AI Travel Agent That Never Hallucinates", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=V7HQCMcaJ8A" + } + ] +} diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json new file mode 100644 index 000000000..5cd90226a --- /dev/null +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn partnership with the Department for Environment, Food and Rural Affairs (DEFRA), Datacove developed a bespoke Shiny dashboard designed to enhance decision-making in the areas of Health and Wellbeing, Nature, and Sustainability (HWNS). This presentation explores three key aspects: project and data management, customisation, and usability enhancements in R.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1659, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Pq5VhosMJQE/maxresdefault.jpg", + "title": "Brookes & Horne - Dashboards to Aid British Government Decisions (using R) | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Pq5VhosMJQE" + } + ] +} diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json new file mode 100644 index 000000000..432077fa3 --- /dev/null +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis tutorial introduces Pixeltable, which provides data-centric AI infrastructure with a declarative, incremental approach for multimodal workloads. Participants will learn to manage multimodal data (text, images, video) using Pixeltable's declarative interface. We'll cover data versioning, indexing, and orchestration through computed columns and iterators. Attendees will gain practical experience with Pixeltable's integration capabilities and custom UDFs.\n\nRequirements: Python knowledge, basic ML concepts. Materials will be available via a GitHub repository and Google Colab notebooks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1513, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/C7_nw2Rebfs/maxresdefault.jpg", + "title": "Brunelle & Kornacker - Hands-on Multimodal AI Development with Pixeltable | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=C7_nw2Rebfs" + } + ] +} diff --git a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json new file mode 100644 index 000000000..ad8e6af30 --- /dev/null +++ b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis tutorial empowers deep learning practitioners to master the entire PyTorch workflow, from efficient model creation to advanced tracking and optimization techniques. We'll begin by exploring a practical PyTorch workflow, then delve into integrating popular experiment tracking tools like MLFlow and Weights & Biases. You'll learn to log custom metrics, artifacts, and interactive visualizations, enhancing your model development process. Finally, we'll tackle hyperparameter optimization using Optuna's Bayesian search, all while maintaining meticulous experiment tracking for easy comparison and reproducibility.\n\nBy the end of the session, you'll have constructed a robust, modular pipeline for managing experiments and optimizing model performance. Whether you're new to PyTorch or an experienced data scientist looking to improve your workflow, this hands-on tutorial offers immediately applicable insights and techniques to enhance your deep learning projects across diverse domains.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5443, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/pzSzhn9H6X4/maxresdefault.jpg", + "title": "Cain\u00e3 Max Couto da Silva - PyTorch Workflow Mastery: A Guide to Track and Optimize Model Performance", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pzSzhn9H6X4" + } + ] +} diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json new file mode 100644 index 000000000..716f72bcd --- /dev/null +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLearn how to write a native Python application in the browser using WebAssembly enabled by PyScript.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5925, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/J2XOSdDWPIo/maxresdefault.jpg", + "title": "Chris Laffra - PyScript - Writing a Python application in the browser | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=J2XOSdDWPIo" + } + ] +} diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json new file mode 100644 index 000000000..87ee8e7a9 --- /dev/null +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nComponent-based modeling systems such as Simulink and Dymola allow for building scientific models in a way that can be composed. For example, Bob can build a model of an engine, and Alice can build a model of a drive shaft, and you can then connect the two models and have a model of a car. These kinds of tools are used all throughout industrial modeling and simulation in order to allow for \"separation of concerns\", allowing experts to engineer their domain and compose the final digital twins with reusable scientific modules. But what about open source? In this talk we will introduce ModelingToolkit, an open source component-based modeling framework that allows for composing pre-built models and scales to large high-fidelity digital twins.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1643, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/yW4oU-7_tGE/maxresdefault.jpg", + "title": "Chris Rackauckas - Open Source Component-Based Modeling with ModelingToolkit | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=yW4oU-7_tGE" + } + ] +} diff --git a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json new file mode 100644 index 000000000..0cf7603d1 --- /dev/null +++ b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nMAPIE (Model Agnostic Prediction Interval Estimator) is your go-to solution for managing uncertainties and risks in machine learning models. This Python library, nestled within scikit-learn-contrib, offers a way to calculate prediction sets with controlled coverage rates for regression and classification tasks.\n\nBut it doesn't stop there - MAPIE can also be used to handle more complex tasks like time series analysis, multi-label classification, computer vision and natural language processing, ensuring probabilistic guarantees on crucial metrics.\n\nJoin us as we delve into the world of conformal predictions and how to quickly manage your uncertainties using MAPIE.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5056, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ZkLIWS9dlZI/maxresdefault.jpg", + "title": "Cordier, Jawad, & Laurent - Boosting AI Reliability: Uncertainty Quantification with MAPIE", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZkLIWS9dlZI" + } + ] +} diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json new file mode 100644 index 000000000..48e061fd8 --- /dev/null +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nShiny for Python is an efficient and reactive application framework that will be able to grow with your application needs. As your shiny application grows, you may find yourself needing more custom behaviors and potentially reusing and sharing your custom behaviors with others. \nYou may also find your existing applications to be overly complex and had to see the overall structure of the application. Here are some tips on writing better Shiny Applications and leveling up your code.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1841, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2Cst7_s_4H8/maxresdefault.jpg", + "title": "Daniel Chen - Tips to Level-Up Your Shiny for Python Applications | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2Cst7_s_4H8" + } + ] +} diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json new file mode 100644 index 000000000..47666e2d4 --- /dev/null +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nJulia is a high-performance language for technical computing that offers advantages like type stability, just-in-time compilation, and extensive parallel computing support. Its Machine Learning ecosystem, although having fewer options, is functional and includes packages like DataFrames.jl, Flux.jl, MLJ.jl, and SciML for various ML tasks. Additional tools cover data visualization, R compatibility, and specific ML applications. The ecosystem is comprehensive and can meet many ML researcher/professional needs. This talk provides an overview of the ecosystem, discussing both its strengths and potential areas for improvement.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1672, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/17Jm_Gqv3K8/maxresdefault.jpg", + "title": "Daniel Molina - Discover the Julia Machine Learning Ecosystem | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=17Jm_Gqv3K8" + } + ] +} diff --git a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json new file mode 100644 index 000000000..7f463a8ae --- /dev/null +++ b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nStreamlining clinical trial output workflows is a key challenge in clinical studies. To deliver reports to health authorities, clinical trial statisticians need to create several scripts to produce deliverables such as output datasets, tables, figures, and listings. Statisticians must also handle specific execution orders to respect dependencies between the generated datasets.\n\nOur project leverages Python programming to automatically generate orchestration workflows from clinical trial project metadata using the Snakemake framework. Snakemake supports the execution of multiple jobs using Docker containers, facilitating multilingual orchestration. This enables our users to run end-to-end (E2E) data engineering workflows using their preferred programming languages, primarily SAS and R. Moreover, Snakemake allows parallel runs for efficient workflow management.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1793, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/7xrDlgaz-QM/maxresdefault.jpg", + "title": "Daphn\u00e9 Grasselly - Enabling Multi-Language Programming in Data Engineering Workflows", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7xrDlgaz-QM" + } + ] +} diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json new file mode 100644 index 000000000..fed617708 --- /dev/null +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk will tell the tale of how we migrated a data application from Streamlit to Panel. And what it took to scale from 100 users to 2000+ users in less than 2 months. It's a story of pain, Kubernetes, resilience, and a whole lot of Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1817, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/xDcGtPgxXEk/maxresdefault.jpg", + "title": "Duarte Carmo - Panel: The Dashboard That Grew - A Scaling Saga | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=xDcGtPgxXEk" + } + ] +} diff --git a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json new file mode 100644 index 000000000..7970c3e2e --- /dev/null +++ b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nRetrieval-Augmented Generation (RAG), despite being a superstar of GenAI over the last year, comes with a plethora of challenges and is prone to errors. Open Source Python libraries like RAGAS and TruLens provide frameworks for evaluating RAG systems, using various metrics that leverage LLMs to assess performance. But when using LLM in a RAG system is in itself a source of errors, it remains to be seen how reliable it would be to use another LLM, allthebit a more powerful one, as a judge of the RAG performance. This study explores various RAG evaluation metrics, as well as the choice of evaluator LLM, to examine the reliability and consistency of LLM-based evaluations. The aim is to provide practical insights and guidance for interpreting these evaluations effectively, and help users make informed decisions when applying them in diverse contexts.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1807, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/hCCJoJ5URD0/maxresdefault.jpg", + "title": "El Mawass & Neeman - Evaluating RAGs: On the correctness and coherence of Open Source eval metrics", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=hCCJoJ5URD0" + } + ] +} diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json new file mode 100644 index 000000000..e2f0d7c35 --- /dev/null +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn this talk we present the OS library Burr -- a tool that makes it easier to build reliable, production-ready AI applications and agents. We will show how to use Burr to address a host of production concerns problems including generating test data from prior runs, interactive debugging, persisting/loading application state, and more\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1648, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/3Ks02G18anA/maxresdefault.jpg", + "title": "Elijah ben Izzy - Build Production Ready AI Agents with Burr | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3Ks02G18anA" + } + ] +} diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json new file mode 100644 index 000000000..b921ddec6 --- /dev/null +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nEnjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.*\n\n*Note the baseline\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1699, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/iJp12vplXAc/maxresdefault.jpg", + "title": "Evan Wimpey - Python is a Joke! | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=iJp12vplXAc" + } + ] +} diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json new file mode 100644 index 000000000..fa7e22833 --- /dev/null +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAn introduction to solving combinatorial optimization and constraint satisfaction problems in Python. I will review the most popular libraries for SAT/CSP. We will then deep dive to a crash corse on using Google's award winning OR-tools library, for efficiently solving some non-trivial real-world constrained combinatorial optimization problems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1741, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/bl13uhchJVA/maxresdefault.jpg", + "title": "Eyal Gruss - Let our optima combine! | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=bl13uhchJVA" + } + ] +} diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json new file mode 100644 index 000000000..cdc60cffe --- /dev/null +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nTo apply or not to apply, that is the question.\n\nCausal reasoning elevates predictive outcomes by shifting from \u201cwhat happened\u201d to \u201cwhat would happen if\u201d. Yet, implementing causality can be challenging or even infeasible in some contexts. This talk explores how the very act of assessing its applicability can add value to your projects. Through a gentle introduction to causal inference tools and practical use cases, you will learn how to bring greater scientific rigour to real-world problems.\n\nTarget audience: Practicing and aspiring data scientists, machine learning engineers, and analysts looking to improve their decision-making with causal inference.\n\nNo prior knowledge is assumed.\n\nFor the seasoned practitioners I hope to shine light on aspects that may not have been considered. \ud83d\udca1\n\nCan't make the talk? Read all about it in my new TDS article: \ud83e\udde0\ud83e\uddf9 Causality \u2014 Mental Hygiene for Data Science (http://bit.ly/causal-hygiene)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1756, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "http://bit.ly/causal-hygiene", + "url": "http://bit.ly/causal-hygiene" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/POMePoP8M-w/maxresdefault.jpg", + "title": "Eyal Kazin - \ud83e\udde0\ud83e\uddf9 Causality - Mental Hygiene for Data Science | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=POMePoP8M-w" + } + ] +} diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json new file mode 100644 index 000000000..184ff85f6 --- /dev/null +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nAs data grows larger and more complex, efficient storage and processing become critical to achieving scalable and high-performance computing. Blosc2 (https://www.blosc.org), a powerful meta-compressor library, addresses these challenges by enabling rapid compression and decompression of large, multidimensional arrays (NDArrays). This tutorial will introduce the core concepts of working with Blosc2, focusing on how it can be leveraged to optimize both storage and computational performance in Python.\n\nAttendees will learn how to:\n\n Efficiently create and manage large NDArrays, including options for persistence.\n Select the best codecs and filters for specific data types and workflows to achieve optimal compression ratios and performance.\n Perform computations directly on compressed data to save memory and speed up processing.\n Seamlessly share NDArrays using Caterva2, a versatile library designed to enable remote sharing and serving of multidimensional datasets.\n\nThis tutorial is ideal for Python developers working with large-scale data in scientific computing, machine learning, and other data-intensive fields.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5220, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://www.blosc.org", + "url": "https://www.blosc.org" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/aR-i_a3nGx0/maxresdefault.jpg", + "title": "Francesc Alted - Mastering Large NDArray Handling with Blosc2 and Caterva2 | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=aR-i_a3nGx0" + } + ] +} diff --git a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json new file mode 100644 index 000000000..fe53e5f83 --- /dev/null +++ b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nNon-Intrusive Load Monitoring (NILM) is a key technique in data-driven energy management and home automation, aimed at disaggregating energy consumption to identify active appliances in households and quantify their energy usage. This presentation:\n\n Provides an overview of NILM, highlighting its advantages and reviewing state-of-the-art deep learning algorithms developed for this purpose.\n Examines smart meters and IoT devices in energy systems, with a focus on the Chain2 protocol used in Italian energy systems. This event-based protocol generates low-volume data, enabling real-time energy monitoring and alerting.\n Presents examples of deep learning models trained on real-world IoT sensor data from energy meters, demonstrating their application in energy disaggregation.\n\nThis session offers an insightful overview of real-world deep learning applications in energy systems. While tailored for data scientists and data engineers interested in these fields, no prior knowledge is required. Join to explore how these technologies are driving energy optimization, cost reduction, and enhancing personal energy consumption awareness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1796, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/cMtYqUqdzsA/maxresdefault.jpg", + "title": "Francesco Conti - Deep Learning in Energy Management: Non-Intrusive Load Monitoring for IoT Devices", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=cMtYqUqdzsA" + } + ] +} diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json new file mode 100644 index 000000000..9bc28e316 --- /dev/null +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk is an introduction to automatic differentiation with a focus on the Python and Julia ecosystems. We will first explain what autodiff is and how it works, then describe its various implementations in both languages. Our goal is to give everyone a good understanding of how computer code can be differentiated, while also discussing the trade-offs this differentiability entails.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1727, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/4sF-Wm8w31c/maxresdefault.jpg", + "title": "Guillaume Dalle - Automatic differentiation, a tale of two languages | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=4sF-Wm8w31c" + } + ] +} diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json new file mode 100644 index 000000000..7429ee669 --- /dev/null +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nChanging data is hard: The computer may crash, scripts could fail, and data structures could be changing. Relational data management systems provide transactional (\u201cACID\u201d) guarantees that can be immensely useful for data analysis. DuckDB provides all-or-nothing semantics for changes to datasets and is robust against failures of any kind. In this talk, we will illustrate the usefulness DuckDB\u2019s transactional facilities to bring sanity to changes to data analysis workflows in Python.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1823, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/7UqLMHloTsQ/maxresdefault.jpg", + "title": "Hannes M\u00fchleisen - Changing Data With Confidence using DuckDB | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7UqLMHloTsQ" + } + ] +} diff --git a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json new file mode 100644 index 000000000..5bf49b48a --- /dev/null +++ b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis proposal aims to develop a Python curriculum for data science for multidisciplinary studies in university education. Data Science is nowadays a trending topic in any area like social science, finance, natural science and so many others. Therefore, every student in the university education is keen to learn data science using computer languages rather than using SPSS or other traditional data analysis tools especially related to research. So, this aims to develop a new curriculum for any student studying from any discipline in higher education to learn data science using trending techniques and tools. Python is the core programming language here because it is very widely used and related to data science field. Plus, it has many advantages like easy to learn and use, platform independence used, large and active community support. Utilizing Bloom\u2019s Taxonomy as the guiding framework has developed a new curriculum for four-year degree programs to succeed in data driven world considering multidisciplinary approach. In this curriculum, students can start from Python basic programming concepts to progress to advanced analyzing techniques using libraries like Pandas, NumPy, and Seaborn, and platforms such as Anaconda and Google Colab and finally build own projects in that students related discipline. Ultimately this curriculum will leverage success in Data-centric society in domain specific applications.\n\nKeywords: Bloom\u2019s, curriculum, multidisciplinary, python, science, taxonomy\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1756, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/r3Diqvfy4Fo/maxresdefault.jpg", + "title": "Hansila Sudasinghe - PYDATA Bloom Framework: An Approach to Data Science in University Education", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=r3Diqvfy4Fo" + } + ] +} diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json new file mode 100644 index 000000000..85d287d6d --- /dev/null +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nDoing geoscience is hard. It\u2019s even harder if you have to figure out how to handle large amounts of data!\n\nXarray is an open-source Python library designed to simplify the handling of labeled multi-dimensional arrays, like raster geospatial data, making it a favorite among geoscientists. It allows these scientists to easily express their computations, and is backed by Dask, a Python library for parallel and distributed computing, to scale computations to entire clusters of machines.\n\nPeople love using Xarray on Dask for geospatial workloads, but only up to about the terabyte scale. At this point, the stack can struggle, requiring expertise to work well and frustrating users and developers alike.\n\nTo address this and enable the Dask \u2764\ufe0f Xarray stack to scale to hundreds of terabytes, we have recently designed a suite of large-scale geospatial benchmarks. With the help of these benchmarks, we are able to understand what limits performance within Dask and Xarray, and to address these issues.\nIn this talk, we will explore how Dask integrates with libraries like Xarray and Zarr to scale geospatial workloads and other multi-dimensional array computations.\n\nWe will also dive deeper into some of the bottlenecks in the Dask \u2764\ufe0f Xarray stack that our benchmarks revealed, as well as some of the recent improvements we have made in these areas. With the help of our benchmark suite, we then assess the impact of these changes.\n\nJoin us to discover how Dask helps you scale geoscience workloads from your laptop to the cloud.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1771, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/KJxJRx7KQtc/maxresdefault.jpg", + "title": "Hendrik Makait - Dask \u2764\ufe0f Xarray: Geoscience at Massive Scale | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=KJxJRx7KQtc" + } + ] +} diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json new file mode 100644 index 000000000..0e300b288 --- /dev/null +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nA beginner level hands-on introduction to BigQuery DataFrames. Please bring your laptop! There is nothing to install in advance\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5325, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2D5-7zIeOQ4/maxresdefault.jpg", + "title": "Hsia, Swena & Williams- Python + BigQuery + DataFrames: Hands on with scalable \"serverless\" analysis", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2D5-7zIeOQ4" + } + ] +} diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json new file mode 100644 index 000000000..b4d2b5b76 --- /dev/null +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nHaving worked on Kaggle's LLM-based ARC AGI program-writing challenge for 6 months using Llama3, I'll give reflections on the lessons learned making an automatic program generator, evaluating it, coming up with strong representations for the challenge, chain-of-thought and program-of-thought styles and some multi-stage critical thinking approaches. You'll get tips for tuning your own prompts and shortcuts to help you evaluate your own LLM usage with greater assurance in the face of non-deterministic outcomes.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1742, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ft_PYi8A93M/maxresdefault.jpg", + "title": "Ian Ozsvald - Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ft_PYi8A93M" + } + ] +} diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json new file mode 100644 index 000000000..9791b8e04 --- /dev/null +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\n9 out of 10 engineers will recommend the use of evaluation tools for their LLMs, but admit they only trust eyeballing responses to decide whether it's safe to use. The 10th carefully studies the floor in silence.\n\nThis talk is for engineers, developers or applied researchers who may or may not know of evaluation tools and metrics, but either way benefit from an overview of different risks in applications using LLMs for text generation, Open Source libraries they can use to mitigate these risks, and examples of how to use them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1660, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/o3dBxo6fgcA/maxresdefault.jpg", + "title": "Irina Vidal Migall\u00f3n - Trustworthy LLMs: Vibe checks are not all you need | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=o3dBxo6fgcA" + } + ] +} diff --git a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json new file mode 100644 index 000000000..ea354eb3d --- /dev/null +++ b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nDebugging software itself is a hard task, but debugging GPU software environments can be even more challenging. Understanding the intricate interactions between hardware, drivers, CUDA, C++ dependencies, and Python libraries can be far more complex.\n\nIn this talk we will dig into how these different layers interact and how you can address some of the common pitfalls that folks run into when configuring GPU Python environments. We will also introduce a new tool, RAPIDS Doctor, that aims to take the challenge out of ensuring your software environments are in good shape. RAPIDS Doctor checks and diagnoses environmental health issues straight from the command line, ensuring that your setup is fully functional and optimized for performance.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1645, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ghq-VDNvNss/maxresdefault.jpg", + "title": "Jacob Tomlinson & Melody Wang- The art of wrangling your GPU Python environments |PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ghq-VDNvNss" + } + ] +} diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json new file mode 100644 index 000000000..826f8185b --- /dev/null +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nDue to its high-level syntax and powerful interactive prompt, Julia is typically used as a computational front-end language. However there is growing interest in using Julia to develop statically-compiled libraries to be called from other languages (Python, C++, etc.). I will present recent and ongoing work happening in the Julia community to enable this use case, including building smaller binaries and static analysis tooling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1843, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/LluyXFj9YDI/maxresdefault.jpg", + "title": "Jeff Bezanson - Statically-Compiled Julia for Library Development | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=LluyXFj9YDI" + } + ] +} diff --git a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json new file mode 100644 index 000000000..76e7381a7 --- /dev/null +++ b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn the rapidly evolving field of natural language processing, the evaluation of large language models (LLMs) is crucial for understanding their performance and guiding their development. This talk delves into the two primary evaluation methodologies: reference-based and reference less techniques.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1933, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ObFGLVEPYoc/maxresdefault.jpg", + "title": "Jhaveri & Joshi - Holistic Evaluation of Large Language Models: From References to Human Judgment", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ObFGLVEPYoc" + } + ] +} diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json new file mode 100644 index 000000000..163cc77a8 --- /dev/null +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk will explain how to solve business forecasting problems using time series methods. Time series forecasting remains a specialty topic. Because of this you really want to use a package tuned for your use case and specialized to deal with the difficulties inherent in time series forecasting. I will share a simplified problem notation that helps you select between time series packages in R and Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1740, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/7H44aJuK0Yg/maxresdefault.jpg", + "title": "John Mount - Solving Forecasting Problems in R and Python | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=7H44aJuK0Yg" + } + ] +} diff --git a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json new file mode 100644 index 000000000..ec104ed37 --- /dev/null +++ b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn this 90-minute workshop, machine learning engineers and data scientists will learn practical techniques for identifying and mitigating age bias in AI-driven hiring systems. We\u2019ll explore fairness metrics like statistical parity, counterfactual fairness, and equalized odds, and demonstrate how tools such as Fairlearn, Aequitas, and IBM Fairness 360 can be used to monitor and improve model fairness. Through hands-on exercises, participants will walk away with the skills to evaluate and de-bias models in high-risk areas like recruitment.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5415, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/G1I45toaVSs/maxresdefault.jpg", + "title": "John Sandall - Fairness Tales: How To Measure And Mitigate Unfair Bias in Machine Learning Models", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=G1I45toaVSs" + } + ] +} diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json new file mode 100644 index 000000000..44465c30f --- /dev/null +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLearn to build powerful sensors running on low-cost microcontrollers, all in Python!\n\nDid you known that (Micro)Python can scale all the way down to microcontrollers\nthat have less than 1 MB of RAM and program memory? Such devices can cost just a few dollars, and are widely used to measure, log, analyze and react to physical phenomena. This enables a wide range of useful and fun applications - be it for a smart home, wearables, scientific measurements, consumer products or industrial solutions.\n\nIn this talk, we will demonstrate how to get started with MicroPython on a ESP32 microcontroller.\nWe will first show how to create a basic Internet-connected sensor node using simple analog/digital sensors. And then we will show how to create advanced sensors that use Digital Signal Processing and Machine Learning to analyze microphone, accelerometer or camera data\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1716, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/nCmBJJHGQKo/maxresdefault.jpg", + "title": "Jon Nordby - Microcontrollers + Machine Learning with MicroPython in 1-2-3 | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=nCmBJJHGQKo" + } + ] +} diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json new file mode 100644 index 000000000..5b29d9a47 --- /dev/null +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nFaustream is an open-source tool I developed that bridges the gap between streaming data and real-time predictive analytics. This talk explores how Faustream leverages Python, Kafka, and Faust to handle high-velocity data streams while applying machine learning models in real-time. We'll dive into its architecture, key features, and applications, demonstrating how it can revolutionize data processing across industries.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1063, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/exHRTSGZtAo/maxresdefault.jpg", + "title": "Joseph Oladokun-Bridging the Gap: Real-Time Predictive Analytics with Faustream | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=exHRTSGZtAo" + } + ] +} diff --git a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json new file mode 100644 index 000000000..388d39a00 --- /dev/null +++ b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nData quality is a crucial factor that significantly impacts the performance of machine learning models. However, many data scientists often overlook or underestimate the hidden costs associated with poor data quality. This talk will highlight common data challenges, and discuss their implications for model accuracy and reliability. Attendees will learn practical strategies to identify, assess, and improve data quality, ensuring their machine learning projects yield better results.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1861, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2ugMOAYwLpQ/maxresdefault.jpg", + "title": "Kalyan Prasad - The Hidden Costs of Data Quality - Tackling Common Data Challenges in ML", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2ugMOAYwLpQ" + } + ] +} diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json new file mode 100644 index 000000000..e4cdd2c59 --- /dev/null +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nWriting GPU code in Python is easier today than ever, and in this tutorial, we will cover how you can get started with accelerating your code.\n\nYou don't need to learn C++ and you don't need new development tools.\n\nAttendees will be expected to have a general knowledge of Python and programming concepts, but no GPU experience will be necessary. Our key takeaway for attendees will be the knowledge that they don\u2019t have to do much differently to get their code running on a GPU\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5341, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/rfXgtUYF3lw/maxresdefault.jpg", + "title": "Katrina Riehl & Jacob Tomlinson - GPU development with Python 101 | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=rfXgtUYF3lw" + } + ] +} diff --git a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json new file mode 100644 index 000000000..385a12988 --- /dev/null +++ b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nDiscover why the Unix command line remains a powerful and relevant tool for data scientists, even in a Python-dominated landscape. This talk will demonstrate how embracing the command line and leveraging its many tools can significantly enhance your productivity, streamline data workflows, and complement your Python skills.\n\nJeroen Janssens, PhD, is a polyglot data science consultant and certified instructor. His expertise lies in visualizing data, implementing machine learning models, and building solutions using Python, R, JavaScript, and Bash. Jeroen is passionate about open source and sharing knowledge. He is the author of Data Science at the Command Line (O\u2019Reilly, 2021) and is currently writing Python Polars: The Definitive Guide (O\u2019Reilly, 2025). Every now and then he blogs at https://jeroenjanssens.com.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3266, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://jeroenjanssens.com.", + "url": "https://jeroenjanssens.com." + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/siPGvvrfylQ/maxresdefault.jpg", + "title": "KEYNOTE: Dr. Jeroen Janssens - Embrace the Unix Command Line and Supercharge Your PyData Workflow", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=siPGvvrfylQ" + } + ] +} diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json new file mode 100644 index 000000000..189c31b24 --- /dev/null +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nJoin us for an exciting keynote from Peter Wang\n\nIn this talk, Peter walks through some of the most interesting learnings from the last few years of AI, as well as lessons learned over the last decade of Python's adoption for data science, in an effort to answer the question, \"What is the role of the open data science movement in the era of AI?\"\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 3786, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/3hSjftUjmWk/maxresdefault.jpg", + "title": "KEYNOTE: Peter Wang - Do Python and Data Science Matter in Our AI Future? | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=3hSjftUjmWk" + } + ] +} diff --git a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json new file mode 100644 index 000000000..98f37b608 --- /dev/null +++ b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nskchange is a python compatible framework library for detecting anomalies, changepoints in time series, and segmentation.\n\nskchange is based on and extends sktime, the most widely used scikit-learn compatible framework library for learning with time series. Both packages are maintained under permissive license, easily extensible by anyone, and interoperable with the python data science stack.\n\nThis workshop gives a hands-on introduction to the new joint detection interface developed in skchange and sktime, for detecting point anomalies, changepoints, and segment anomalies, in unsupervised, semi-supervised, and supervised settings.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5362, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/VwhevNkxjYw/maxresdefault.jpg", + "title": "Kiraly, Risi, & Tveten - sktime: time series anomaly detection, changepoint detection, segmentation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=VwhevNkxjYw" + } + ] +} diff --git a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json new file mode 100644 index 000000000..10bf08203 --- /dev/null +++ b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nFeature selection is an essential process in machine learning, especially when dealing with high-dimensional datasets. It helps reduce the complexity of machine learning models, improve performance, mitigate overfitting, and decrease computation time. This talk will present a novel open source feature selection framework, shap-select.\nShap-select is noteworthy because of its simplicity - it requires only one fit of the model for which one does feature selection, and yet performs comparably to much heavier methods. It conducts a linear or logistic regression of the target on the Shapley values of the features, on the validation set, and uses the signs and significance levels of the regression coefficients to implement an efficient heuristic for feature selection in tabular regression and classification tasks.\nWe compare this to several other methods, showing that shap-select combines interpretability, computational efficiency, and performance, offering a robust solution for feature selection, especially for real-world cases where model fitting is computationally expensive.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1625, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/pmqvyrIyB_8/maxresdefault.jpg", + "title": "Koseoglu & Kraev - Fast, intuitive feature selection via regression on Shapley values", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pmqvyrIyB_8" + } + ] +} diff --git a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json new file mode 100644 index 000000000..a3cb09678 --- /dev/null +++ b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAs the climate changes, farmers in Africa are facing enormous challenges, from unpredictable rainfall to shifting growing seasons. In this session, I will share how we can use machine learning (ML) models, built on open-source platforms like TensorFlow and Google Earth Engine, to predict crop yields for key staples such as maize and cassava. By looking at case studies from Kenya, Ghana, and Malawi, I'll show how ML is helping farmers decide when to plant, manage resources more efficiently, and reduce climate risks. I\u2019ll also talk about practical tools\u2014like community hubs, radio broadcasts, and SMS alerts\u2014that ensure even non-literate farmers can use these insights. Expect to walk away with actionable ideas on how to implement these techniques in your own work on food security.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1675, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/wMwEmlhyYh0/maxresdefault.jpg", + "title": "Kristal Joi Wise - Harnessing Machine Learning to Improve Agricultural Resilience in Africa", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=wMwEmlhyYh0" + } + ] +} diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json new file mode 100644 index 000000000..3f415e70b --- /dev/null +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk will uncover the power of AI in combating Amazon deforestation through an innovative cattle detection system. We present a cutting-edge approach to monitoring illegal ranching, a primary driver of deforestation, using very high-resolution satellite imagery and deep learning. We'll dive into the unique challenges of detecting cattle from space \u2013 from congested scenes with small, clustered targets to diverse and cluttered backgrounds \u2013 and how we overcame them with a two-step neural network approach. By combining classification and density estimation techniques, our model efficiently identifies potential cattle locations and estimates herd sizes across varied landscapes. Discover how this interdisciplinary project, developed in collaboration with Brazilian prosecutors, leverages data science to drive real-world impact in environmental conservation and sustainable land management. Join us to explore the intersection of computer vision, geospatial analysis, and environmental advocacy, and learn how AI can be a powerful tool in the fight against deforestation in the Amazon and beyond.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1790, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/1uex29BVbgg/maxresdefault.jpg", + "title": "Leonie Hodel - Using AI to Spot Deforestation-related Cows on Satellite Images | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=1uex29BVbgg" + } + ] +} diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json new file mode 100644 index 000000000..bc20ae1a5 --- /dev/null +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nData scientists in the real world have to manage messy datasets that evolve over time. New data must be added, old data must be removed and changes to columns must be handled gracefully. Furthermore, many real world datasets grow from a size that works on a laptop to a size that must run on a server. This talk will show that in Python we can meet all these challenges in a simple and scalable way using the delta-rs package to manage the data storage and Polars to read and write the dataset.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1710, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ZIrq9GsN2HM/maxresdefault.jpg", + "title": "Liam Brannigan - Build simple & scalable data pipelines with Polars & DeltaLake | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ZIrq9GsN2HM" + } + ] +} diff --git a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json new file mode 100644 index 000000000..267c4fa31 --- /dev/null +++ b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nBy unifying PySpark's robust big data processing/analyzing capability with Lance's multimodal AI data lake, data engineers and scientists can efficiently manage and analyze the diverse data types required for cutting-edge AI applications within a familiar big data framework.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1617, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/noZNcpYRrkk/maxresdefault.jpg", + "title": "Lu Qiu & Allison Wang - Empowering PySpark with Lance Format for Multi-Modal AI Data Pipelines", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=noZNcpYRrkk" + } + ] +} diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json new file mode 100644 index 000000000..b40c4c4b5 --- /dev/null +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTransformers are everywhere: NLP, Computer Vision, sound generation and even protein-folding. Why not in forecasting? After all, what ChatGPT does is predicting the next word. Why this architecture isn't state-of-the-art in the time series domain?\n\nIn this talk, you will understand how Amazon Chronos and Salesforece's Moirai transformer-based forecasting models work, the datasets used to train them and how to evaluate them to see if they are a good fit for your use-case.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1251, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/DZICL_8vdXI/maxresdefault.jpg", + "title": "Luca Baggi - Foundational Models for Time Series Forecasting: are we there yet? | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=DZICL_8vdXI" + } + ] +} diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json new file mode 100644 index 000000000..60cd2cc37 --- /dev/null +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nWhat if deploying a Python app was as simple as a single click, and came at zero cost? With PyCafe, you can offer users live, interactive examples of your libraries or have them submit reproducible examples when reporting issues.\nBuilt on top of Pyodide, PyCafe runs countless web frameworks (e.g. streamlit, dash, panel, gradio) directly in the browser. By making apps easy to create, share, and edit, PyCafe opens up new workflows, including possibilities we may not have even imagined yet.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1653, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/-adJy4MxZgE/maxresdefault.jpg", + "title": "Maarten Breddels - Python Apps in the Browser made simple by PyCafe | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-adJy4MxZgE" + } + ] +} diff --git a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json new file mode 100644 index 000000000..bd9ec9345 --- /dev/null +++ b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nHow do you know when a user experience isn\u2019t hitting the mark? Do you wait for it to show up in qualitative feedback? Do you have a long list of different metrics that you have to keep track of that could potentially signal a problem? When evaluating user experiences, how can you quantify if it\u2019s a good experience or not? Additionally, how do you know if your good or bad experience is impacting other areas of the business?\n\nThese are common problems for product managers and the data scientists and analysts who support them. To solve them, I propose creating an aggregate metric that represents the effort or friction experienced by your users - a User Effort Index.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1748, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2-0iWgVC2oc/maxresdefault.jpg", + "title": "Maggie Wolff - Measuring the User Experience and the Impact of Effort on Business Outcomes", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2-0iWgVC2oc" + } + ] +} diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json new file mode 100644 index 000000000..22eb9afaa --- /dev/null +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nData Prep Kit (https://github.com/IBM/data-prep-kit) is a new open source python library to help you wrangle and clean your data for generative AI applications (de-dupe, detect language, removing PII, detect malware, creating embeddings, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 4647, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/IBM/data-prep-kit", + "url": "https://github.com/IBM/data-prep-kit" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/GVA1XK0jrf8/maxresdefault.jpg", + "title": "Maniyam & Nielsen - Preparing Data for LLM Applications Using Data Prep Kit | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=GVA1XK0jrf8" + } + ] +} diff --git a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json new file mode 100644 index 000000000..67c41412d --- /dev/null +++ b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nHave you ever wanted to understand LLM internals such as pre-training, supervised fine-tuning, instruction-tuning, reinforcement learning with human feedback, parameter efficient fine-tuning, expanding LLM context lengths, attention mechanism variants, model deployment performance, and cost optimization, which GPUs to use when and more? This talk will take an end-to-end review of the LLM training and deployment pipeline to give you both a stronger intuition and a faster path to implementation using model training and deployment frameworks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1773, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/V2L6hufE2X4/maxresdefault.jpg", + "title": "Mark Moyou, PhD - Understanding the end-to-end LLM training and inference pipeline", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=V2L6hufE2X4" + } + ] +} diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json new file mode 100644 index 000000000..298a91ed3 --- /dev/null +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nWe present \u201cakimbo\u201d, a library bringing a numpy-like API and vector-speed processing to dataframes on the CPU or GPU. When your data is more complex than simple one-dimensional columns, this is the most natural way to perform selection, mapping and aggregations without iterating over python objects, saving a large factor in memory and processing time.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1787, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/thfNEGCuwbY/maxresdefault.jpg", + "title": "Martin Durant- akimbo: vectorized processing of nested/ragged dataframe columns | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=thfNEGCuwbY" + } + ] +} diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json new file mode 100644 index 000000000..9ebeecc0e --- /dev/null +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\n\"What training data do you need, don't you just train on the whole internet?\"\n\"Doesn't data production rely heavily on outsourcing to cheap labour markets in the Global South?\"\n\"Isn't all training data just synthetic nonsense generated by LLMs nowadays, how can you expect a model to learn anything worthwhile?\"\n\nThese are all questions that I regularly get, when I tell people I work on building foundational LLMs. Because as often as we use LLMs in our daily lives nowadays, people generally know very little of the data that went into the LLM to train it.\n\nIn this talk, I'll address these questions and hope to build an understanding of what it takes to build an LLM from scratch, from a data perspective.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1670, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/uV3HLROlcLM/maxresdefault.jpg", + "title": "Marysia Winkels - The Data That Shapes Foundational LLMs | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=uV3HLROlcLM" + } + ] +} diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json new file mode 100644 index 000000000..b023975c7 --- /dev/null +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThe upcoming release of Apache Spark 4.0 delivers substantial enhancements that refine the functionality and augment the developer experience with the Spark unified analytics engine.\n\nAttendees will learn how to use Apache Spark 4.0's advancements for optimized data processing and analytics\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1735, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/zBy3-NiylY8/maxresdefault.jpg", + "title": "Matthew Powers - New Features in Apache Spark 4.0 | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=zBy3-NiylY8" + } + ] +} diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json new file mode 100644 index 000000000..c62b8d21c --- /dev/null +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\n\u201cI like waiting for my build jobs,\u201d said no one ever. CI is an essential part of ensuring quality, helping to highlight new issues before they might be merged into the main codebase. CI gives us confidence that the code changes being proposed don\u2019t break things, as least as far as our tests cover. That confidence comes at the cost of time and compute resources.\n\nThe RAPIDS team at NVIDIA manages its own operations and compute resources. Those resources are limited, of course, so we wait our turn and put the toys back when we\u2019re done.. It is essential to us that we are using our resources as efficiently as possible. This is the \u201cSpeed of Light\u201d principle at NVIDIA: how close are you to a theoretical optimal limit? For CI, this involves several factors: startup wait time, docker image setup time, cache utilization, build tool processes, and limiting unnecessary redoing builds and tests for things that haven\u2019t changed. The RAPIDS team set out to add telemetry to all of our builds, so that we can quantify where we are spending our time and compute resources, and ensure that we are spending them wisely. We\u2019ll demonstrate the telemetry tools that we\u2019re using, and show how you can add them to your build jobs.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1784, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/fasxVtDQgK0/maxresdefault.jpg", + "title": "Michael Sarahan - Going Plaid: Striving for Speed of Light in CI pipelines | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=fasxVtDQgK0" + } + ] +} diff --git a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json new file mode 100644 index 000000000..9a4b76351 --- /dev/null +++ b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis hands-on tutorial guides participants through the process of constructing the essential components of a Machine Learning Platform (MLP) from scratch. We'll focus on implementing five core elements: a feature store, model registry, orchestrator, inference engine, and basic monitoring system. The session emphasizes practical, hands-on coding using Test-Driven Development (TDD), Domain Driven Design, and hexagonal architecture principles providing attendees with a functional foundation for a robust ML infrastructure.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5359, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/XkNeCavaJtw/maxresdefault.jpg", + "title": "Nathan Colbert - From Inference to Features: Build a Core ML Platform from Scratch", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=XkNeCavaJtw" + } + ] +} diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json new file mode 100644 index 000000000..9e6fe8fed --- /dev/null +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nPlotnine is a Python library that implements the Grammar of Graphics, enabling users to create complex, layered plots. This talk covers techniques for customising your plots, using time series data as an example, and highlights how plotnine integrates with matplotlib, allowing you to enhance your data visualisations for better storytelling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2062, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/NBGJuaBF2rc/maxresdefault.jpg", + "title": "Nicola Rennie - Practical Techniques for Polished Visuals with Plotnine | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=NBGJuaBF2rc" + } + ] +} diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json new file mode 100644 index 000000000..ae6f52217 --- /dev/null +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTenova, as an innovative engineering company, collaborates closely with its client-partners to create advanced technologies and services that optimize business operations.\n\nThis talk discusses the deployment of our image recognition system to identify and mitigate potential hazards on steel plants, specifically focusing on the detection of bulky steel pieces.\nThe system was deployed on-premise using an edge device and an IP camera, supported by Azure IoT Edge and a Flask API for image processing and prediction.\nA recent migration to a RabbitMQ-based architecture using Pika enhanced scalability and communication.\n\nThe presentation will cover technical strategies, the challenges (like offline functionality and real-time, low-latency hazard detection) and the positive impact of the system on workplace safety and operational efficiency.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1441, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/G8ypUIlvlEg/maxresdefault.jpg", + "title": "Nicol\u00f2 Giso - Image Recognition for safety on the factory floor | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=G8ypUIlvlEg" + } + ] +} diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json new file mode 100644 index 000000000..4301647e1 --- /dev/null +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nGeospatial data is more important than ever for tackling real-world challenges like urban planning and climate change. This tutorial teaches you how to use tools like CesiumJS and Python to turn raw data into interactive 3D visuals. It\u2019s a hands-on way to bring data to life and try to make an impact.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5389, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/-6vh5vMgPHA/maxresdefault.jpg", + "title": "Nompumelelo Mtsweni- 3D geospatial data visualization using Python and Cesiumjs | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=-6vh5vMgPHA" + } + ] +} diff --git a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json new file mode 100644 index 000000000..3c3134749 --- /dev/null +++ b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk focuses on the underrepresentation of women in AI and data science, where only 22% of AI professionals are women. We will explore how addressing the missing 78% is critical to creating inclusive, innovative solutions that benefit society as a whole. Attendees will learn about the current challenges women face, the importance of diverse perspectives in AI development, and actionable strategies for empowering women in the field through community engagement, mentorship, and data-driven policies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1789, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/2k72xRc67wE/maxresdefault.jpg", + "title": "Noor Aftab - The Missing 78%: How Women in AI & Data Can Complete the Future of Innovation", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=2k72xRc67wE" + } + ] +} diff --git a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json new file mode 100644 index 000000000..507602fc0 --- /dev/null +++ b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nJoin us to celebrate the innovative minds behind NumHack 2024!\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2542, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/UPSGvCtKYGE/maxresdefault.jpg", + "title": "NumHack 2024: Winners Announcements & Demo Showcase | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=UPSGvCtKYGE" + } + ] +} diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json new file mode 100644 index 000000000..674f971c4 --- /dev/null +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nGraphRAG is a popular way to use KGs to ground AI apps. Most GraphRAG tutorials use LLMs to build graph automatically from unstructured data. However, what if you're working on use cases such as investigative journalism and sanctions compliance -- \"catching bad guys\" -- where transparency for decisions and evidence are required?\n\nThis talk explores how to leverage open data, open models, and open source to build investigative graphs which are accountable, exploring otherwise hidden relations in the data that indicate fraud or corruption. This illustrates techniques used in production use cases for anti-money laundering (AML), ultimate beneficial owner (UBO), rapid movement of funds (RMF), and other areas of sanctions compliance in general.\n\nThis approach uses Python open source libraries, e.g., the K\u00f9zuDB graph database and LanceDB vector database. For each NLP task we use state-of-the-art open models (mostly not LLMs) emphasizing how to tune for a domain context: named entity recognition, relation extraction, textgraph, entity linking, as well as entity resolution to merge structured data and produce a semantic overlay that organizes the graph.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1750, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Nrsh6LzUk6A/maxresdefault.jpg", + "title": "Paco Nathan - Catching Bad Guys using open data and open models for graphs | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Nrsh6LzUk6A" + } + ] +} diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json new file mode 100644 index 000000000..3585deec0 --- /dev/null +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nCSP is a newly open-sourced library for stream processing in Python. In this talk, we discuss how CSP can be leveraged to handle all stages of an online machine learning pipeline from feature generation to live training and inference\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1721, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/f5G8OVuRI3k/maxresdefault.jpg", + "title": "Pascal Tomecek - Leveraging CSP for Live Inference | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=f5G8OVuRI3k" + } + ] +} diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json new file mode 100644 index 000000000..65f8ba627 --- /dev/null +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nMany organizations are eager to build and deploy their own large language models (LLMs), but validating them can feel frustrating and incomplete. Fortunately, as data scientists we are experts in model diagnostics, and we can extend these same principles to LLM validation. In this talk, I will present a scientific approach to evaluating custom text generation models in Python across several dimensions such as safety, coherence, and correctness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1763, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/vxSRIL1WD9g/maxresdefault.jpg", + "title": "Patrick Deziel - Putting the data science back into LLM evaluation | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=vxSRIL1WD9g" + } + ] +} diff --git a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json new file mode 100644 index 000000000..23b626a4e --- /dev/null +++ b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk will go over an application scenario that brings together the benefits of vector search with graph traversal. Knowledge graphs (or more generally, graphs), have long been used to model structured data that capture the connection between entities in the real world. Recently, there has been a lot of interest in the topic of Graph RAG, which aims to use graphs as part of the retrieval process in RAG, to enhance the outcomes. The talk will cover a practical example to showcase how Python developers can leverage the PyData ecosystem alongside two open source, embedded databases: K\u00f9zu for the graph component, and LanceDB for the vector component of the retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1787, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ky2yufsffas/maxresdefault.jpg", + "title": "Prashanth Rao - Graph RAG: Bringing together graph and vector search to empower retrieval", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ky2yufsffas" + } + ] +} diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json new file mode 100644 index 000000000..3f61dc751 --- /dev/null +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nUnlike stylized machine learning examples in textbooks and lectures, data are often not readily available to be used to train models and gain insight in real-world applications; instead, practitioners are required to collect those data themselves.\nHowever, data annotation can be expensive (in terms of time, money, or some safety-critical conditions), thus limiting the amount of data we can possibly obtain.\n(Examples include eliciting an online shopper's preference with ads at the risk of being intrusive, or conducting an expensive survey to understand the market of a given product.)\nFurther, not all data are created equal: some are more informative than others.\nFor example, a data point that is similar to one already in our training set is unlikely to give us new information; conversely, a point that is different from the data we have thus far could yield novel insight.\nThese considerations motivate a way for us to identify the most informative data points to label and gain knowledge in a way that makes use of our labeling budget as effectively as possible.\nBayesian experimental design (BED) formalizes this framework, leveraging the tools from Bayesian statistics and machine learning to answer the question: which data point is the most valuable that should be labeled to improve our knowledge?\n\nThis talk serves as a friendly introduction to BED including its motivation as discussed above, how it works, and how to implement it in Python.\nDuring our discussions, we will show that interestingly, binary search, a popular algorithm in computer science, is a special case of BED.\nData scientists and ML practitioners who are interested in decision-making under uncertainty and probabilistic ML will benefit from this talk.\nWhile most background knowledge necessary to follow the talk will be covered, the audience should be familiar with common concepts in ML such as training data, predictive models, and common probability distributions (normal, uniform, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1878, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/zssnoI2JvTo/maxresdefault.jpg", + "title": "Quan Nguyen - Cost-effective data annotation with Bayesian experimental design | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=zssnoI2JvTo" + } + ] +} diff --git a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json new file mode 100644 index 000000000..a250c5d0f --- /dev/null +++ b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nData deduplication is a ubiquitous data quality problem that most data people will encounter at some point in their career. It happens whenever multiple records are collected about the same person or other entity without a unique identifier that ties these records together.\n\nThis talk provides beginners with everything they need to start linking and deduping large datasets using Splink (https://github.com/moj-analytical-services/splink), a free Python library.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1644, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/moj-analytical-services/splink", + "url": "https://github.com/moj-analytical-services/splink" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/eQtFkI8f02U/maxresdefault.jpg", + "title": "Robin Linacre - Rapid deduplication and fuzzy matching of large datasets using Splink", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eQtFkI8f02U" + } + ] +} diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json new file mode 100644 index 000000000..b29bf6c8e --- /dev/null +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nPolars (https://github.com/pola-rs/polars) boasts 18 different data types, not including variants of numerical types.\n\nDo we really need such a vast collection of data types?\n\nWhat is the use case for each type?\n\nWhat is the difference between List and Array? Or between Categorical and Enum? And why on Earth would I ever need a Struct?\n\nThis talk will clear up all of these questions and more, as we go through the data types that Polars provides and understand why we need each one of them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1829, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/pola-rs/polars", + "url": "https://github.com/pola-rs/polars" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/8HwfVVknhP4/maxresdefault.jpg", + "title": "Rodrigo Gir\u00e3o Serr\u00e3o - Understanding Polars data types | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=8HwfVVknhP4" + } + ] +} diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json new file mode 100644 index 000000000..4795695da --- /dev/null +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAsynchronous programming can be intimidating for many due to its unique syntax, paradigm, and different behavior in environments like IPython and Jupyter notebooks.\n\nBut it\u2019s not that complicated\u2014and I'll prove it. In this talk, I will demystify the basics, along with some advanced concepts, from a practical perspective. By the end, you'll be ready to get started and implement significant performance improvements in your network or I/O-bound code.\n\nAttend this talk if you\u2019ve been intimidated by async and await for a while and are ready to change that.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1808, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/oy7sEAfJsWw/maxresdefault.jpg", + "title": "Ryan Varley - Let's get you started with asynchronous programming | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=oy7sEAfJsWw" + } + ] +} diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json new file mode 100644 index 000000000..184e8b89a --- /dev/null +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLarge Language Models are great at writing and chatting, but are they also able to talk like a human? Today, modern LLM-based voice bots can listen to users, talk back to them with a realistic voice, handle interruptions and improvise, while sticking to the goal they're given by their builders. And this is not only true for the latest, eye-watering expensive OpenAI's models! In this session we will learn how modern voice bots are made, which open source tools are available to build them, and we are going to see in practice how to build one. At the end of the session, the demo's full source code will be shared with the audience.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1925, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Td5dFdG0wE4/maxresdefault.jpg", + "title": "Sara Zanzottera - Building LLM Voice Bots with Open Source Tools | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Td5dFdG0wE4" + } + ] +} diff --git a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json new file mode 100644 index 000000000..38482438d --- /dev/null +++ b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\nThe R Development Guide (R Dev Guide) (https://contributor.r-project.org/rdevguide/) serves as a resource for onboarding new contributors to the R project. Initially drafted in 2021 and then expanded during the Google Season of Docs 2022, the guide has evolved to make contributing more accessible, especially for newcomers. This talk will explore the latest developments in the guide, its impact on the R community, and how it fosters inclusivity within the project by simplifying the contribution process.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1192, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://contributor.r-project.org/rdevguide/", + "url": "https://contributor.r-project.org/rdevguide/" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/l5BwwvQlwG0/maxresdefault.jpg", + "title": "Saranjeet Kaur Bhogal - Empowering New Contributors: The Evolving Role of the R Development Guide", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=l5BwwvQlwG0" + } + ] +} diff --git a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json new file mode 100644 index 000000000..660613bce --- /dev/null +++ b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nData rules the world and data-scientists / MLEs across academia and industry are creating new and innovative ways to glean insights which have changed our lives through easy to understand and intuitive interfaces. At the heart of the AI / ML revolution ( genAI, LLMs, bioinformatics, climate science etc ) is the availability and elasticity of state of the art hardware which enables processing large swaths of data ( TBs ) that could not run on local laptops for want of compute/memory. Cloud providers have commoditized these powerful machines to the extent that they are now available to every person with a few clicks.\n\nCloud computing allows us to tradeoff upfront hardware costs for granular operational expenses such as renting GPUs by the second. Prima facie this might seem like a winning formula, a key downside is that these costs often add up uncontrollably. Attributing the usage of such hardware to Data/AI/ML jobs across dimensions like cloud accounts, instances, workloads down to the lowest level of granularity, can help provide transparency to not only cost albeit resource management as well.\n\nThrough our work with open-source Metaflow, which started at Netflix in 2017, we have had an opportunity to help customers place their cloud spend in the context of value produced by individual projects combined with more granular resource management to limit spend.\n\nIn this talk, we will provide an overview of the lessons we have learnt in our quest to get a better handle on costs by using Metaflow. We will share best practices to consider when writing AI/ML workloads and how constructs in the Metaflow framework can be used to answer questions Data-Scientists/MLE\u2019s ask themselves such as:\n\nHow do my cloud costs break down over time and what workloads/cloud instances are driving these costs?\nAre the workloads executing tuned to allow maximum usage of these expensive resources?\nHow can I refactor my workloads such that the expensive resources are used to their optimal capacity?\nIn particular, we'll focus on best practices to follow when working with large datasets in a distributed multi cloud / cluster environments, and how Metaflow constructs can help achieve that in a human friendly manner, with very few lines of code.\n\nThe audience will be empowered to build and deploy production-grade Data/AI/ML pipelines while learning strategies on how to optimize workloads to keep expensive ML/AI operations under control. Finally, the audience will have the tools to answer questions like \u201cAm I using my resources to their fullest extent? If not, what are the opportunities for tuning my AI/ML jobs resource requirements, to bin pack hardware and subsequently reduces overall costs\u201d\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1813, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/YYZ6vcumojo/maxresdefault.jpg", + "title": "Saurabh Garg - Navigating Cloud Expenses in Data & AI: Strategies for Scientists and Engineers", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=YYZ6vcumojo" + } + ] +} diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json new file mode 100644 index 000000000..4a170421b --- /dev/null +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nWhat if designing data workflows felt like snapping together LEGO blocks? In this talk, we\u2019ll explore how open-source tools enable flexible, modular PyData workflows. We\u2019ll discuss why open source is essential for avoiding vendor lock-in and how to integrate libraries and frameworks within the Python ecosystem, alongside tools like GitHub Actions. Plus, I\u2019ll introduce DataJourney, an open-source toolkit I developed that makes designing workflows as fun and creative as building with LEGO.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1829, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/KdgegsH3rAQ/maxresdefault.jpg", + "title": "Sayantika Banik - The LEGO Approach to designing PyData Workflows | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=KdgegsH3rAQ" + } + ] +} diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json new file mode 100644 index 000000000..190bf0eb8 --- /dev/null +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThe nvmath-python is a new way of delivering NVIDIA accelerated Math Libraries to Python users: researchers-practitioners, library and framework developers, and optimized GPU kernel developers. In this talk we will provide an introduction to the library design goals, its architecture, overview of the key features along with its usage examples.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1646, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ABao7JTDTMI/maxresdefault.jpg", + "title": "Sergey Maydanov - Bringing NVIDIA math libraries to Python scientific community | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ABao7JTDTMI" + } + ] +} diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json new file mode 100644 index 000000000..ccb6608ec --- /dev/null +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAs organizations increasingly integrate and adopt AI and machine learning internally, the challenge of maintaining separate pipelines for ML-powered systems and conventional software makes it difficult for DevOps teams to maintain these separate pipelines. This talk explores a unified approach to DevOps and MLOps, demonstrating how existing DevOps pipelines can be transformed into efficient MLOps pipelines using ModelKits with KitOps\n\nWe'll begin by examining the reasons behind the traditional separation of DevOps and MLOps pipelines, including differences in project nature, required expertise, and the size and complexity of artifacts. We'll then delve into the challenges posed by separate pipelines, such as increased costs, coordination difficulties, and accumulating technical debt. Thus the attendees will learn how to leverage open source tooling like KitOps to create a unified pipeline that accommodates both traditional software and ML-powered projects, ultimately leading to more efficient and cost-effective operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1822, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/eExvPkSqTCQ/maxresdefault.jpg", + "title": "Shivay Lamba - Streamlining AI development and Deployment with KitOps | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=eExvPkSqTCQ" + } + ] +} diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json new file mode 100644 index 000000000..5e56577eb --- /dev/null +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAnomaly detection is hardly a new problem, nor is the progress in it as rapid as the LLM blast we\u2019re witnessing today. But it is pressing.\n\nIn this talk, we\u2019ll talk about a realtime anomaly detection pipeline on time series data and discuss the nitty-gritties of the algorithm knobs that help us build an unbiased and reliable system, which includes 1) using NeuralProphet, an open source framework, to forecast for time series data and 2) using robust techniques to detect true anomalies using forecasting errors.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1807, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/ca4w2ZIZ0S0/maxresdefault.jpg", + "title": "Shreya Khurana - Realtime Time Series Anomaly Detection in Production | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=ca4w2ZIZ0S0" + } + ] +} diff --git a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json new file mode 100644 index 000000000..30d3003e4 --- /dev/null +++ b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThe paid search landscape is undergoing a remarkable transformation, evolving from traditional keyword-centric strategies to a more nuanced approach that prioritizes audience targeting. This shift is not just a trend; it\u2019s a response to the ever-increasing demand for precision and effectiveness in reaching potential customers in a crowded digital marketplace.\n\nAt the forefront of this evolution is our innovative automated system designed to identify high-intent users through sophisticated batch processing of their website behaviour. By harnessing the power of machine learning, we create a dynamic layer that curates smarter audiences those that closely resemble our most valuable converted customers. This enables us to execute precise retargeting campaigns that not only drive meaningful engagement but also optimize marketing budgets, resulting in enhanced audience selection and significantly higher conversion rates.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1685, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/d1SaxtX7L6E/maxresdefault.jpg", + "title": "Shrikanth Singh - Automating SEA Retargeting for Smarter Audience Engagement and Higher Conversions", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=d1SaxtX7L6E" + } + ] +} diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json new file mode 100644 index 000000000..0bfa5eafd --- /dev/null +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nThis talk explores how to align large language models (LLMs) with human values via preference learning (PL) in the presence of challenges such as incomplete and corrupted data in preference datasets. We propose a novel method for recalibrating values to tackle these issues, enhancing LLM resilience by improving the robustness of existing models. The session highlights real-world experiments that show how the method addresses adversarial noise and unobserved comparisons, making it essential for building more reliable, ethically aligned AI systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1921, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/E01yrxkmWvM/maxresdefault.jpg", + "title": "Son The Nguyen- Improve LLMs Alignment with Complete and Robust Preference Data | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=E01yrxkmWvM" + } + ] +} diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json new file mode 100644 index 000000000..c22bb64e6 --- /dev/null +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nToday we will learn how to build an application around sensor data, REST Feeds, weather data, traffic cameras and vector data. We will write a simple Python application to collect various structured, semistructured data and unstructured data, We will process, enrich, augment and vectorize this data and insert it into a Vector Database to be used for semantic hybrid search and filtering. We will then build a Jupyter notebook to analyze, query and return this data.\n\nAlong the way we will learn the basics of Vector Databases and Milvus. While building it we will see the practical reasons we choose what indexes make sense, what to vectorize, how to query multiple vectors even when one is an image and one is text. We will see why we do filtering. We will then use our vector database of Air Quality readings to feed our LLM and get proper answers to Air Quality questions. I will show you how to all the steps to build a RAG application with Milvus, LangChain, Ollama, Python and Air Quality Reports. Finally after demos I will answer questions, provide the source code and additional resources including articles.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 5385, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/IJuzKZdiLCg/maxresdefault.jpg", + "title": "Timothy Spann - It's in the Air Tonight. Sensor Data in RAG | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=IJuzKZdiLCg" + } + ] +} diff --git a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json new file mode 100644 index 000000000..2f06c302b --- /dev/null +++ b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json @@ -0,0 +1,47 @@ +{ + "description": "www.pydata.org\n\ndata.table is an R package with C code that is one of the most efficient open-source in-memory data manipulation packages available today. First released to CRAN by Matt Dowle in 2006, it continues to grow in popularity, and now over 1500 other CRAN packages depend on data.table. This talk will start with data reading from CSV, discuss basic and advanced data manipulation topics, and finally will end with a discussion about how you can contribute to data.table.\n\nhttps://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 4098, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + }, + { + "label": "https://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english", + "url": "https://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/l_7FXnppu-g/maxresdefault.jpg", + "title": "Toby Dylan Hocking- Using and contributing to the data.table package for efficient big data analysis", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=l_7FXnppu-g" + } + ] +} diff --git a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json new file mode 100644 index 000000000..c711c1545 --- /dev/null +++ b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nGenerative AI is revolutionizing industries by enhancing efficiency, personalization, and insight. This talk explores how a robust Python ecosystem, including Streamlit, various libraries, and APIs, is harnessed to build powerful generative AI applications. Attendees will gain insights into the practical implementation of these technologies and their transformative impact on business operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2159, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/I7jVu-fHavI/maxresdefault.jpg", + "title": "Tony Ojeda - Generative AI + Python: Unlocking Efficiency, Personalization, and Insight", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=I7jVu-fHavI" + } + ] +} diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json new file mode 100644 index 000000000..257a546ce --- /dev/null +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nLearn how to get started on your online ML journey with River, an open source Python ML library. The foundations of machine learning were built on offline batch processing techniques for model training and inference. As organisations become more dependent on real-time data, the technological trend for machine learning in production is moving towards adding an online stream processing approach. This has benefits such as lower computational requirements due to being able to incrementally learn from a stream of data points, which enables the continual upgrading of models by adapting to real-time changes in data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1815, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/GhDRKUT9gZA/maxresdefault.jpg", + "title": "Tun Shwe - Moving from Offline to Online Machine Learning with River | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=GhDRKUT9gZA" + } + ] +} diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json new file mode 100644 index 000000000..7e0d3ea20 --- /dev/null +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nAs large language models (LLMs) become increasingly integrated into industries like finance, healthcare, and law, ensuring their responsible deployment is critical\u2014particularly in highly regulated environments. These industries face unique challenges, including data privacy, compliance with strict regulations, and minimizing the risks of biased or untrustworthy outputs.\n\nThis session will explore the complexities of using LLMs in regulated industries and present a governance framework to address these challenges. We'll cover practical solutions for deploying LLMs while adhering to industry-specific regulations, ensuring transparency, reducing bias, and maintaining data privacy. Attendees will learn how to implement governance best practices at various stages of the LLM lifecycle\u2014from model training and validation to deployment and ongoing monitoring.\n\nDrawing on real-world examples and lessons learned, this talk will equip data scientists, machine learning engineers, and AI leaders with actionable strategies for navigating regulatory compliance and minimizing risks, while still harnessing the full potential of LLMs to drive innovation.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1455, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/__VU52cv6jk/maxresdefault.jpg", + "title": "Vyoma Gajjar- LLMs in Regulated Industries: Challenges and Governance Solutions | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=__VU52cv6jk" + } + ] +} diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json new file mode 100644 index 000000000..0b8d3a97f --- /dev/null +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nIn this talk, I will offer my perspective on the modern data tools landscape and in particular user-facing tools for interactive data science and data exploration. The latest trends of composable data systems and embeddable query engines like DuckDB and DataFusion create both challenges and opportunities to create a more coherent and productive stack of tools for both end user data scientists and developers building data systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1804, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/w4aYrav8-zE/maxresdefault.jpg", + "title": "Wes McKinney - Retooling for a Smaller Data Era | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=w4aYrav8-zE" + } + ] +} diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json new file mode 100644 index 000000000..63c027b62 --- /dev/null +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nTraditional document processing for Retrieval-Augmented Generation (RAG) often involves cumbersome, error-prone extraction pipelines, hampering AI's ability to retrieve high-quality information from complex formats like PDFs and PowerPoint decks. ColPali disrupts this process by embedding entire pages\u2014text, visuals, and layout\u2014into rich, multi-vector representations using Vision Language Models (VLMs). This talk explores how ColPali, paired with multimodal models like the Llama 3.2 Vision series, enables RAG systems to \u201csee\u201d and reason over documents, dramatically improving retrieval performance. Attendees will learn to implement ColPali for enhanced, scalable, and robust enterprise knowledge retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 2151, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/pnacsAWnjV8/maxresdefault.jpg", + "title": "Zain Hasan - ColPali\u2019s Vision-Powered RAG for Enterprise Documents | PyData Global 2024", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=pnacsAWnjV8" + } + ] +} diff --git a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json new file mode 100644 index 000000000..0af722340 --- /dev/null +++ b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json @@ -0,0 +1,43 @@ +{ + "description": "www.pydata.org\n\nProviding timely maternal healthcare in developing countries is a critical challenge. This talk demonstrates how data-driven solutions can bridge healthcare gaps and improve access to vital healthcare information for pregnant women, with user privacy in mind. To do so, we fine-tuned the Gemma-2 2 billion parameter instruction model on a synthetic dataset in order to detect whether user messages pertain to urgent or non-urgent maternal healthcare issues. By quickly identifying and prioritizing user inquiries, the model can aid help desks by ensuring urgent messages are promptly forwarded to the appropriate healthcare professionals for immediate intervention.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "duration": 1809, + "language": "eng", + "recorded": "2024-12-03", + "related_urls": [ + { + "label": "Conference Website", + "url": "https://pydata.org/global2024" + }, + { + "label": "https://github.com/numfocus/YouTubeVideoTimestamps", + "url": "https://github.com/numfocus/YouTubeVideoTimestamps" + } + ], + "speakers": [ + "TODO" + ], + "tags": [ + "Education", + "Julia", + "NumFOCUS", + "Opensource", + "PyData", + "Python", + "Tutorial", + "coding", + "how to program", + "learn", + "learn to code", + "python 3", + "scientific programming", + "software" + ], + "thumbnail_url": "https://i.ytimg.com/vi/Lb0ecRiz4xE/maxresdefault.jpg", + "title": "Zhen (Tony) Zhao - Training Language Models to Identify Urgent Messages in Real-Time", + "videos": [ + { + "type": "youtube", + "url": "https://www.youtube.com/watch?v=Lb0ecRiz4xE" + } + ] +} From 86870df799a519a5fedb88bd99fb5c3029da53e3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Fri, 27 Jun 2025 01:53:11 +0000 Subject: [PATCH 2/7] Remove tags --- ...ng-duckdb-and-python-pydata-global-2024.json | 17 +---------------- ...patching-in-networkx-pydata-global-2024.json | 17 +---------------- ...o-deepfake-detection-pydata-global-2024.json | 17 +---------------- ...-the-future-of-forecasting-or-just-hype.json | 17 +---------------- ...for-vector-databases-pydata-global-2024.json | 17 +---------------- ...sis-with-statsmodels-pydata-global-2024.json | 17 +---------------- ...-agents-with-structured-text-generation.json | 17 +---------------- ...m-zero-to-production-pydata-global-2024.json | 17 +---------------- ...regression-workflows-pydata-global-2024.json | 17 +---------------- ...sion-and-scalability-pydata-global-2024.json | 17 +---------------- ...ations-and-ship-fast-pydata-global-2024.json | 17 +---------------- ...n-projects-using-nix-pydata-global-2024.json | 17 +---------------- ...r-for-fun-and-profit-pydata-global-2024.json | 17 +---------------- ...ian-processes-useful-pydata-global-2024.json | 17 +---------------- ...r-models-for-japanese-medical-documents.json | 17 +---------------- ...your-own-transformer-pydata-global-2024.json | 17 +---------------- ...ai-travel-agent-that-never-hallucinates.json | 17 +---------------- ...nt-decisions-using-r-pydata-global-2024.json | 17 +---------------- ...ment-with-pixeltable-pydata-global-2024.json | 17 +---------------- ...to-track-and-optimize-model-performance.json | 17 +---------------- ...ation-in-the-browser-pydata-global-2024.json | 17 +---------------- ...with-modelingtoolkit-pydata-global-2024.json | 17 +---------------- ...y-uncertainty-quantification-with-mapie.json | 17 +---------------- ...-python-applications-pydata-global-2024.json | 17 +---------------- ...e-learning-ecosystem-pydata-global-2024.json | 17 +---------------- ...ogramming-in-data-engineering-workflows.json | 17 +---------------- ...-grew-a-scaling-saga-pydata-global-2024.json | 17 +---------------- ...d-coherence-of-open-source-eval-metrics.json | 17 +---------------- ...-ai-agents-with-burr-pydata-global-2024.json | 17 +---------------- ...pey-python-is-a-joke-pydata-global-2024.json | 17 +---------------- ...t-our-optima-combine-pydata-global-2024.json | 17 +---------------- ...ene-for-data-science-pydata-global-2024.json | 17 +---------------- ...-blosc2-and-caterva2-pydata-global-2024.json | 17 +---------------- ...trusive-load-monitoring-for-iot-devices.json | 17 +---------------- ...ale-of-two-languages-pydata-global-2024.json | 17 +---------------- ...fidence-using-duckdb-pydata-global-2024.json | 17 +---------------- ...to-data-science-in-university-education.json | 17 +---------------- ...nce-at-massive-scale-pydata-global-2024.json | 17 +---------------- ...ds-on-with-scalable-serverless-analysis.json | 17 +---------------- ...-s-arc-agi-challenge-pydata-global-2024.json | 17 +---------------- ...are-not-all-you-need-pydata-global-2024.json | 17 +---------------- ...-python-environments-pydata-global-2024.json | 17 +---------------- ...-library-development-pydata-global-2024.json | 17 +---------------- ...odels-from-references-to-human-judgment.json | 17 +---------------- ...lems-in-r-and-python-pydata-global-2024.json | 17 +---------------- ...-unfair-bias-in-machine-learning-models.json | 17 +---------------- ...micropython-in-1-2-3-pydata-global-2024.json | 17 +---------------- ...ytics-with-faustream-pydata-global-2024.json | 17 +---------------- ...y-tackling-common-data-challenges-in-ml.json | 17 +---------------- ...ment-with-python-101-pydata-global-2024.json | 17 +---------------- ...ne-and-supercharge-your-pydata-workflow.json | 17 +---------------- ...ter-in-our-ai-future-pydata-global-2024.json | 17 +---------------- ...tion-changepoint-detection-segmentation.json | 17 +---------------- ...ection-via-regression-on-shapley-values.json | 17 +---------------- ...prove-agricultural-resilience-in-africa.json | 17 +---------------- ...-on-satellite-images-pydata-global-2024.json | 17 +---------------- ...ith-polars-deltalake-pydata-global-2024.json | 17 +---------------- ...ormat-for-multi-modal-ai-data-pipelines.json | 17 +---------------- ...ing-are-we-there-yet-pydata-global-2024.json | 17 +---------------- ...ade-simple-by-pycafe-pydata-global-2024.json | 17 +---------------- ...e-impact-of-effort-on-business-outcomes.json | 17 +---------------- ...-using-data-prep-kit-pydata-global-2024.json | 17 +---------------- ...end-llm-training-and-inference-pipeline.json | 17 +---------------- ...ed-dataframe-columns-pydata-global-2024.json | 17 +---------------- ...es-foundational-llms-pydata-global-2024.json | 17 +---------------- ...-in-apache-spark-4-0-pydata-global-2024.json | 17 +---------------- ...ight-in-ci-pipelines-pydata-global-2024.json | 17 +---------------- ...s-build-a-core-ml-platform-from-scratch.json | 17 +---------------- ...isuals-with-plotnine-pydata-global-2024.json | 17 +---------------- ...on-the-factory-floor-pydata-global-2024.json | 17 +---------------- ...-python-and-cesiumjs-pydata-global-2024.json | 17 +---------------- ...a-can-complete-the-future-of-innovation.json | 17 +---------------- ...ements-demo-showcase-pydata-global-2024.json | 17 +---------------- ...en-models-for-graphs-pydata-global-2024.json | 17 +---------------- ...p-for-live-inference-pydata-global-2024.json | 17 +---------------- ...-into-llm-evaluation-pydata-global-2024.json | 17 +---------------- ...-and-vector-search-to-empower-retrieval.json | 17 +---------------- ...-experimental-design-pydata-global-2024.json | 17 +---------------- ...matching-of-large-datasets-using-splink.json | 17 +---------------- ...ng-polars-data-types-pydata-global-2024.json | 17 +---------------- ...chronous-programming-pydata-global-2024.json | 17 +---------------- ...th-open-source-tools-pydata-global-2024.json | 17 +---------------- ...volving-role-of-the-r-development-guide.json | 17 +---------------- ...strategies-for-scientists-and-engineers.json | 17 +---------------- ...ing-pydata-workflows-pydata-global-2024.json | 17 +---------------- ...scientific-community-pydata-global-2024.json | 17 +---------------- ...ployment-with-kitops-pydata-global-2024.json | 17 +---------------- ...ection-in-production-pydata-global-2024.json | 17 +---------------- ...ience-engagement-and-higher-conversions.json | 17 +---------------- ...bust-preference-data-pydata-global-2024.json | 17 +---------------- ...t-sensor-data-in-rag-pydata-global-2024.json | 17 +---------------- ...package-for-efficient-big-data-analysis.json | 17 +---------------- ...-efficiency-personalization-and-insight.json | 17 +---------------- ...-learning-with-river-pydata-global-2024.json | 17 +---------------- ...governance-solutions-pydata-global-2024.json | 17 +---------------- ...r-a-smaller-data-era-pydata-global-2024.json | 17 +---------------- ...enterprise-documents-pydata-global-2024.json | 17 +---------------- ...o-identify-urgent-messages-in-real-time.json | 17 +---------------- 98 files changed, 98 insertions(+), 1568 deletions(-) diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json index f9525743d..b7f202601 100644 --- a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/qSs5ALVbzTk/maxresdefault.jpg", "title": "Adarsh Namala - Scaling Outside the Warehouse Using DuckDB and Python | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json index 6db0ac731..d63425cc1 100644 --- a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2UkZVKj6QGY/maxresdefault.jpg", "title": "Aditi Juneja - Understanding API Dispatching in NetworkX | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json index 6021e8812..10aaa13a3 100644 --- a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/MGRmKlDj9rk/maxresdefault.jpg", "title": "Adriana Stan - Off-the-shelf HuggingFace models for audio deepfake detection | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json index 69ba56eda..e4f55aba6 100644 --- a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json +++ b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/5Nt0p_3zU7g/maxresdefault.jpg", "title": "Ahad Shoaib - Foundational Time Series Models in Practice: The Future of Forecasting, or Just Hype?", "videos": [ diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json index 24651e4ea..f4781f3fc 100644 --- a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FdOeLY3rGA8/maxresdefault.jpg", "title": "Akshay Ballal & Sonam Pankaj-The Memory Efficient Indexing for Vector Databases | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json index c260dd4a2..9f149c7e6 100644 --- a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/foMbacbuAQk/maxresdefault.jpg", "title": "Allen Downey - Time Series Analysis with StatsModels | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json index 415870272..8b0e485e4 100644 --- a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json +++ b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/94yuQKoDKkE/maxresdefault.jpg", "title": "Alonso Silva - Building Knowledge Graph-Based Agents with Structured Text Generation", "videos": [ diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json index 330da35d5..54217cccf 100644 --- a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FA1TWdxoyV4/maxresdefault.jpg", "title": "Andrew Weeks - Taking Data Science in industry from zero to production | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json index 8bfc685e3..07803a076 100644 --- a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Z2uz7kwBli8/maxresdefault.jpg", "title": "Anton Antonov - Quantile Regression Workflows | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json index 24d243b9c..6d360f89f 100644 --- a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/wn1L3hlYfc0/maxresdefault.jpg", "title": "Art Anderson - A Deep Dive into Python-Powered Precision and Scalability | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json index 08c1bc9dc..1036587de 100644 --- a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3iQFdcVf9jI/maxresdefault.jpg", "title": "Atin Sanyal- Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json index 332d2652c..03cd344e8 100644 --- a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/QgQzxcPZWxA/maxresdefault.jpg", "title": "Avik Basu - Reproducible Python projects using Nix | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json index 63a6ce2cd..31cf57afe 100644 --- a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ajLPA34upQY/maxresdefault.jpg", "title": "Benjamin Vincent - Climbing the causal ladder for fun and profit | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json index 38632d67a..43cb96bb9 100644 --- a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fi_S89jgUYU/maxresdefault.jpg", "title": "Bill Engels & Chris Fonnesbeck - Making Gaussian Processes Useful | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json index e250508fa..c785923af 100644 --- a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json +++ b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/OitWeFVvShc/maxresdefault.jpg", "title": "Bing Wang - An Evaluation of Open-Source OCR Models for Japanese Medical Documents", "videos": [ diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json index f5383d8aa..494340289 100644 --- a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/TWxD76J5Uho/maxresdefault.jpg", "title": "Borar, Liu, & Shrivastava - Build Your Own Transformer | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json index 343853392..39f372408 100644 --- a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json +++ b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/V7HQCMcaJ8A/maxresdefault.jpg", "title": "Bowne-Anderson, Nichol, & Petraityt\u0117 - Building an AI Travel Agent That Never Hallucinates", "videos": [ diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json index 5cd90226a..f240ddd10 100644 --- a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Pq5VhosMJQE/maxresdefault.jpg", "title": "Brookes & Horne - Dashboards to Aid British Government Decisions (using R) | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json index 432077fa3..4fc43d600 100644 --- a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/C7_nw2Rebfs/maxresdefault.jpg", "title": "Brunelle & Kornacker - Hands-on Multimodal AI Development with Pixeltable | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json index ad8e6af30..8f75642de 100644 --- a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json +++ b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pzSzhn9H6X4/maxresdefault.jpg", "title": "Cain\u00e3 Max Couto da Silva - PyTorch Workflow Mastery: A Guide to Track and Optimize Model Performance", "videos": [ diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json index 716f72bcd..244406f1f 100644 --- a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/J2XOSdDWPIo/maxresdefault.jpg", "title": "Chris Laffra - PyScript - Writing a Python application in the browser | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json index 87ee8e7a9..cf0be8b69 100644 --- a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/yW4oU-7_tGE/maxresdefault.jpg", "title": "Chris Rackauckas - Open Source Component-Based Modeling with ModelingToolkit | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json index 0cf7603d1..ace810dc4 100644 --- a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json +++ b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ZkLIWS9dlZI/maxresdefault.jpg", "title": "Cordier, Jawad, & Laurent - Boosting AI Reliability: Uncertainty Quantification with MAPIE", "videos": [ diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json index 48e061fd8..6f29c2363 100644 --- a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2Cst7_s_4H8/maxresdefault.jpg", "title": "Daniel Chen - Tips to Level-Up Your Shiny for Python Applications | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json index 47666e2d4..f3cb00269 100644 --- a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/17Jm_Gqv3K8/maxresdefault.jpg", "title": "Daniel Molina - Discover the Julia Machine Learning Ecosystem | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json index 7f463a8ae..f873af889 100644 --- a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json +++ b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7xrDlgaz-QM/maxresdefault.jpg", "title": "Daphn\u00e9 Grasselly - Enabling Multi-Language Programming in Data Engineering Workflows", "videos": [ diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json index fed617708..b367561a7 100644 --- a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/xDcGtPgxXEk/maxresdefault.jpg", "title": "Duarte Carmo - Panel: The Dashboard That Grew - A Scaling Saga | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json index 7970c3e2e..11fc64ab1 100644 --- a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json +++ b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/hCCJoJ5URD0/maxresdefault.jpg", "title": "El Mawass & Neeman - Evaluating RAGs: On the correctness and coherence of Open Source eval metrics", "videos": [ diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json index e2f0d7c35..5a7d3e2a2 100644 --- a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3Ks02G18anA/maxresdefault.jpg", "title": "Elijah ben Izzy - Build Production Ready AI Agents with Burr | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json index b921ddec6..0d0f222d8 100644 --- a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/iJp12vplXAc/maxresdefault.jpg", "title": "Evan Wimpey - Python is a Joke! | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json index fa7e22833..d478f8524 100644 --- a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/bl13uhchJVA/maxresdefault.jpg", "title": "Eyal Gruss - Let our optima combine! | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json index cdc60cffe..fe4b89c0a 100644 --- a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/POMePoP8M-w/maxresdefault.jpg", "title": "Eyal Kazin - \ud83e\udde0\ud83e\uddf9 Causality - Mental Hygiene for Data Science | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json index 184ff85f6..7b4b2b346 100644 --- a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/aR-i_a3nGx0/maxresdefault.jpg", "title": "Francesc Alted - Mastering Large NDArray Handling with Blosc2 and Caterva2 | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json index fe53e5f83..07e25e11a 100644 --- a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json +++ b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/cMtYqUqdzsA/maxresdefault.jpg", "title": "Francesco Conti - Deep Learning in Energy Management: Non-Intrusive Load Monitoring for IoT Devices", "videos": [ diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json index 9bc28e316..83b315467 100644 --- a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/4sF-Wm8w31c/maxresdefault.jpg", "title": "Guillaume Dalle - Automatic differentiation, a tale of two languages | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json index 7429ee669..55c95c46a 100644 --- a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7UqLMHloTsQ/maxresdefault.jpg", "title": "Hannes M\u00fchleisen - Changing Data With Confidence using DuckDB | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json index 5bf49b48a..46efc5a83 100644 --- a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json +++ b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/r3Diqvfy4Fo/maxresdefault.jpg", "title": "Hansila Sudasinghe - PYDATA Bloom Framework: An Approach to Data Science in University Education", "videos": [ diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json index 85d287d6d..1a815539d 100644 --- a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KJxJRx7KQtc/maxresdefault.jpg", "title": "Hendrik Makait - Dask \u2764\ufe0f Xarray: Geoscience at Massive Scale | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json index 0e300b288..4d3dbaad4 100644 --- a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2D5-7zIeOQ4/maxresdefault.jpg", "title": "Hsia, Swena & Williams- Python + BigQuery + DataFrames: Hands on with scalable \"serverless\" analysis", "videos": [ diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json index b4d2b5b76..f9d8a3f1a 100644 --- a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ft_PYi8A93M/maxresdefault.jpg", "title": "Ian Ozsvald - Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json index 9791b8e04..1ed8ffea7 100644 --- a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/o3dBxo6fgcA/maxresdefault.jpg", "title": "Irina Vidal Migall\u00f3n - Trustworthy LLMs: Vibe checks are not all you need | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json index ea354eb3d..ec700f77d 100644 --- a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json +++ b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ghq-VDNvNss/maxresdefault.jpg", "title": "Jacob Tomlinson & Melody Wang- The art of wrangling your GPU Python environments |PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json index 826f8185b..38f30daff 100644 --- a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/LluyXFj9YDI/maxresdefault.jpg", "title": "Jeff Bezanson - Statically-Compiled Julia for Library Development | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json index 76e7381a7..41f409496 100644 --- a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json +++ b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ObFGLVEPYoc/maxresdefault.jpg", "title": "Jhaveri & Joshi - Holistic Evaluation of Large Language Models: From References to Human Judgment", "videos": [ diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json index 163cc77a8..762a0146e 100644 --- a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7H44aJuK0Yg/maxresdefault.jpg", "title": "John Mount - Solving Forecasting Problems in R and Python | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json index ec104ed37..d6810aceb 100644 --- a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json +++ b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/G1I45toaVSs/maxresdefault.jpg", "title": "John Sandall - Fairness Tales: How To Measure And Mitigate Unfair Bias in Machine Learning Models", "videos": [ diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json index 44465c30f..30a73f0b6 100644 --- a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/nCmBJJHGQKo/maxresdefault.jpg", "title": "Jon Nordby - Microcontrollers + Machine Learning with MicroPython in 1-2-3 | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json index 5b29d9a47..6c5c54485 100644 --- a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/exHRTSGZtAo/maxresdefault.jpg", "title": "Joseph Oladokun-Bridging the Gap: Real-Time Predictive Analytics with Faustream | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json index 388d39a00..a9cce1961 100644 --- a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json +++ b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2ugMOAYwLpQ/maxresdefault.jpg", "title": "Kalyan Prasad - The Hidden Costs of Data Quality - Tackling Common Data Challenges in ML", "videos": [ diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json index e4cdd2c59..50324e41f 100644 --- a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/rfXgtUYF3lw/maxresdefault.jpg", "title": "Katrina Riehl & Jacob Tomlinson - GPU development with Python 101 | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json index 385a12988..3cfc6e353 100644 --- a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json +++ b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/siPGvvrfylQ/maxresdefault.jpg", "title": "KEYNOTE: Dr. Jeroen Janssens - Embrace the Unix Command Line and Supercharge Your PyData Workflow", "videos": [ diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json index 189c31b24..da86bb47c 100644 --- a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3hSjftUjmWk/maxresdefault.jpg", "title": "KEYNOTE: Peter Wang - Do Python and Data Science Matter in Our AI Future? | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json index 98f37b608..77a53ad36 100644 --- a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json +++ b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/VwhevNkxjYw/maxresdefault.jpg", "title": "Kiraly, Risi, & Tveten - sktime: time series anomaly detection, changepoint detection, segmentation", "videos": [ diff --git a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json index 10bf08203..0010c5e62 100644 --- a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json +++ b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pmqvyrIyB_8/maxresdefault.jpg", "title": "Koseoglu & Kraev - Fast, intuitive feature selection via regression on Shapley values", "videos": [ diff --git a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json index a3cb09678..e894ba3e8 100644 --- a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json +++ b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/wMwEmlhyYh0/maxresdefault.jpg", "title": "Kristal Joi Wise - Harnessing Machine Learning to Improve Agricultural Resilience in Africa", "videos": [ diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json index 3f415e70b..6bb723060 100644 --- a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/1uex29BVbgg/maxresdefault.jpg", "title": "Leonie Hodel - Using AI to Spot Deforestation-related Cows on Satellite Images | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json index bc20ae1a5..48a5206db 100644 --- a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ZIrq9GsN2HM/maxresdefault.jpg", "title": "Liam Brannigan - Build simple & scalable data pipelines with Polars & DeltaLake | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json index 267c4fa31..d8c59e3d3 100644 --- a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json +++ b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/noZNcpYRrkk/maxresdefault.jpg", "title": "Lu Qiu & Allison Wang - Empowering PySpark with Lance Format for Multi-Modal AI Data Pipelines", "videos": [ diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json index b40c4c4b5..313789600 100644 --- a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/DZICL_8vdXI/maxresdefault.jpg", "title": "Luca Baggi - Foundational Models for Time Series Forecasting: are we there yet? | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json index 60cd2cc37..68155f193 100644 --- a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-adJy4MxZgE/maxresdefault.jpg", "title": "Maarten Breddels - Python Apps in the Browser made simple by PyCafe | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json index bd9ec9345..0d8407852 100644 --- a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json +++ b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2-0iWgVC2oc/maxresdefault.jpg", "title": "Maggie Wolff - Measuring the User Experience and the Impact of Effort on Business Outcomes", "videos": [ diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json index 22eb9afaa..5ef20f203 100644 --- a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GVA1XK0jrf8/maxresdefault.jpg", "title": "Maniyam & Nielsen - Preparing Data for LLM Applications Using Data Prep Kit | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json index 67c41412d..fb0120232 100644 --- a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json +++ b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/V2L6hufE2X4/maxresdefault.jpg", "title": "Mark Moyou, PhD - Understanding the end-to-end LLM training and inference pipeline", "videos": [ diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json index 298a91ed3..ee9dc440e 100644 --- a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/thfNEGCuwbY/maxresdefault.jpg", "title": "Martin Durant- akimbo: vectorized processing of nested/ragged dataframe columns | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json index 9ebeecc0e..35274daa3 100644 --- a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/uV3HLROlcLM/maxresdefault.jpg", "title": "Marysia Winkels - The Data That Shapes Foundational LLMs | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json index b023975c7..0e84d904e 100644 --- a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zBy3-NiylY8/maxresdefault.jpg", "title": "Matthew Powers - New Features in Apache Spark 4.0 | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json index c62b8d21c..3a5c5b019 100644 --- a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fasxVtDQgK0/maxresdefault.jpg", "title": "Michael Sarahan - Going Plaid: Striving for Speed of Light in CI pipelines | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json index 9a4b76351..052f02eb5 100644 --- a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json +++ b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/XkNeCavaJtw/maxresdefault.jpg", "title": "Nathan Colbert - From Inference to Features: Build a Core ML Platform from Scratch", "videos": [ diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json index 9e6fe8fed..d30a3abee 100644 --- a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/NBGJuaBF2rc/maxresdefault.jpg", "title": "Nicola Rennie - Practical Techniques for Polished Visuals with Plotnine | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json index ae6f52217..7e0475580 100644 --- a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/G8ypUIlvlEg/maxresdefault.jpg", "title": "Nicol\u00f2 Giso - Image Recognition for safety on the factory floor | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json index 4301647e1..1491a6432 100644 --- a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-6vh5vMgPHA/maxresdefault.jpg", "title": "Nompumelelo Mtsweni- 3D geospatial data visualization using Python and Cesiumjs | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json index 3c3134749..02a5ccf3c 100644 --- a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json +++ b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2k72xRc67wE/maxresdefault.jpg", "title": "Noor Aftab - The Missing 78%: How Women in AI & Data Can Complete the Future of Innovation", "videos": [ diff --git a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json index 507602fc0..0674e43d0 100644 --- a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json +++ b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/UPSGvCtKYGE/maxresdefault.jpg", "title": "NumHack 2024: Winners Announcements & Demo Showcase | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json index 674f971c4..afe0f5518 100644 --- a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Nrsh6LzUk6A/maxresdefault.jpg", "title": "Paco Nathan - Catching Bad Guys using open data and open models for graphs | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json index 3585deec0..dc351b969 100644 --- a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/f5G8OVuRI3k/maxresdefault.jpg", "title": "Pascal Tomecek - Leveraging CSP for Live Inference | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json index 65f8ba627..8c2279e9e 100644 --- a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/vxSRIL1WD9g/maxresdefault.jpg", "title": "Patrick Deziel - Putting the data science back into LLM evaluation | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json index 23b626a4e..13be54023 100644 --- a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json +++ b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ky2yufsffas/maxresdefault.jpg", "title": "Prashanth Rao - Graph RAG: Bringing together graph and vector search to empower retrieval", "videos": [ diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json index 3f61dc751..c165f01e0 100644 --- a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zssnoI2JvTo/maxresdefault.jpg", "title": "Quan Nguyen - Cost-effective data annotation with Bayesian experimental design | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json index a250c5d0f..745da91e6 100644 --- a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json +++ b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/eQtFkI8f02U/maxresdefault.jpg", "title": "Robin Linacre - Rapid deduplication and fuzzy matching of large datasets using Splink", "videos": [ diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json index b29bf6c8e..8bc6799f4 100644 --- a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/8HwfVVknhP4/maxresdefault.jpg", "title": "Rodrigo Gir\u00e3o Serr\u00e3o - Understanding Polars data types | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json index 4795695da..31a389754 100644 --- a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/oy7sEAfJsWw/maxresdefault.jpg", "title": "Ryan Varley - Let's get you started with asynchronous programming | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json index 184e8b89a..5c4587462 100644 --- a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Td5dFdG0wE4/maxresdefault.jpg", "title": "Sara Zanzottera - Building LLM Voice Bots with Open Source Tools | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json index 38482438d..463a09193 100644 --- a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json +++ b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/l5BwwvQlwG0/maxresdefault.jpg", "title": "Saranjeet Kaur Bhogal - Empowering New Contributors: The Evolving Role of the R Development Guide", "videos": [ diff --git a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json index 660613bce..50513bf07 100644 --- a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json +++ b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/YYZ6vcumojo/maxresdefault.jpg", "title": "Saurabh Garg - Navigating Cloud Expenses in Data & AI: Strategies for Scientists and Engineers", "videos": [ diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json index 4a170421b..c769a8bb4 100644 --- a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KdgegsH3rAQ/maxresdefault.jpg", "title": "Sayantika Banik - The LEGO Approach to designing PyData Workflows | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json index 190bf0eb8..8b1f72e3c 100644 --- a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ABao7JTDTMI/maxresdefault.jpg", "title": "Sergey Maydanov - Bringing NVIDIA math libraries to Python scientific community | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json index ccb6608ec..6c655efea 100644 --- a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/eExvPkSqTCQ/maxresdefault.jpg", "title": "Shivay Lamba - Streamlining AI development and Deployment with KitOps | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json index 5e56577eb..e05c89808 100644 --- a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ca4w2ZIZ0S0/maxresdefault.jpg", "title": "Shreya Khurana - Realtime Time Series Anomaly Detection in Production | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json index 30d3003e4..e03152ea5 100644 --- a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json +++ b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/d1SaxtX7L6E/maxresdefault.jpg", "title": "Shrikanth Singh - Automating SEA Retargeting for Smarter Audience Engagement and Higher Conversions", "videos": [ diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json index 0bfa5eafd..86fabcbdf 100644 --- a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/E01yrxkmWvM/maxresdefault.jpg", "title": "Son The Nguyen- Improve LLMs Alignment with Complete and Robust Preference Data | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json index c22bb64e6..ef9a0aa5c 100644 --- a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/IJuzKZdiLCg/maxresdefault.jpg", "title": "Timothy Spann - It's in the Air Tonight. Sensor Data in RAG | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json index 2f06c302b..fd61b7642 100644 --- a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json +++ b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json @@ -20,22 +20,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/l_7FXnppu-g/maxresdefault.jpg", "title": "Toby Dylan Hocking- Using and contributing to the data.table package for efficient big data analysis", "videos": [ diff --git a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json index c711c1545..a11011158 100644 --- a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json +++ b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/I7jVu-fHavI/maxresdefault.jpg", "title": "Tony Ojeda - Generative AI + Python: Unlocking Efficiency, Personalization, and Insight", "videos": [ diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json index 257a546ce..013af1443 100644 --- a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GhDRKUT9gZA/maxresdefault.jpg", "title": "Tun Shwe - Moving from Offline to Online Machine Learning with River | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json index 7e0d3ea20..2698a2ab1 100644 --- a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/__VU52cv6jk/maxresdefault.jpg", "title": "Vyoma Gajjar- LLMs in Regulated Industries: Challenges and Governance Solutions | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json index 0b8d3a97f..933e20269 100644 --- a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/w4aYrav8-zE/maxresdefault.jpg", "title": "Wes McKinney - Retooling for a Smaller Data Era | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json index 63c027b62..aa48ec2e9 100644 --- a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pnacsAWnjV8/maxresdefault.jpg", "title": "Zain Hasan - ColPali\u2019s Vision-Powered RAG for Enterprise Documents | PyData Global 2024", "videos": [ diff --git a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json index 0af722340..2589489f7 100644 --- a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json +++ b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json @@ -16,22 +16,7 @@ "speakers": [ "TODO" ], - "tags": [ - "Education", - "Julia", - "NumFOCUS", - "Opensource", - "PyData", - "Python", - "Tutorial", - "coding", - "how to program", - "learn", - "learn to code", - "python 3", - "scientific programming", - "software" - ], + "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Lb0ecRiz4xE/maxresdefault.jpg", "title": "Zhen (Tony) Zhao - Training Language Models to Identify Urgent Messages in Real-Time", "videos": [ From 27f884855f9886d4504e93458062d728ae7333cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Fri, 27 Jun 2025 01:53:48 +0000 Subject: [PATCH 3/7] Remove suffixes --- ...he-warehouse-using-duckdb-and-python-pydata-global-2024.json | 2 +- ...standing-api-dispatching-in-networkx-pydata-global-2024.json | 2 +- ...-models-for-audio-deepfake-detection-pydata-global-2024.json | 2 +- ...icient-indexing-for-vector-databases-pydata-global-2024.json | 2 +- ...ime-series-analysis-with-statsmodels-pydata-global-2024.json | 2 +- ...-in-industry-from-zero-to-production-pydata-global-2024.json | 2 +- ...ntonov-quantile-regression-workflows-pydata-global-2024.json | 2 +- ...on-powered-precision-and-scalability-pydata-global-2024.json | 2 +- ...itigate-hallucinations-and-ship-fast-pydata-global-2024.json | 2 +- ...producible-python-projects-using-nix-pydata-global-2024.json | 2 +- ...the-causal-ladder-for-fun-and-profit-pydata-global-2024.json | 2 +- ...eck-making-gaussian-processes-useful-pydata-global-2024.json | 2 +- ...rivastava-build-your-own-transformer-pydata-global-2024.json | 2 +- ...british-government-decisions-using-r-pydata-global-2024.json | 2 +- ...modal-ai-development-with-pixeltable-pydata-global-2024.json | 2 +- ...-a-python-application-in-the-browser-pydata-global-2024.json | 2 +- ...-based-modeling-with-modelingtoolkit-pydata-global-2024.json | 2 +- ...p-your-shiny-for-python-applications-pydata-global-2024.json | 2 +- ...the-julia-machine-learning-ecosystem-pydata-global-2024.json | 2 +- ...e-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json | 2 +- ...production-ready-ai-agents-with-burr-pydata-global-2024.json | 2 +- .../videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json | 2 +- .../eyal-gruss-let-our-optima-combine-pydata-global-2024.json | 2 +- ...lity-mental-hygiene-for-data-science-pydata-global-2024.json | 2 +- ...ay-handling-with-blosc2-and-caterva2-pydata-global-2024.json | 2 +- ...ferentiation-a-tale-of-two-languages-pydata-global-2024.json | 2 +- ...ng-data-with-confidence-using-duckdb-pydata-global-2024.json | 2 +- ...k-xarray-geoscience-at-massive-scale-pydata-global-2024.json | 2 +- ...learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json | 2 +- ...lms-vibe-checks-are-not-all-you-need-pydata-global-2024.json | 2 +- ...mpiled-julia-for-library-development-pydata-global-2024.json | 2 +- ...forecasting-problems-in-r-and-python-pydata-global-2024.json | 2 +- ...e-learning-with-micropython-in-1-2-3-pydata-global-2024.json | 2 +- ...-predictive-analytics-with-faustream-pydata-global-2024.json | 2 +- ...nson-gpu-development-with-python-101-pydata-global-2024.json | 2 +- ...data-science-matter-in-our-ai-future-pydata-global-2024.json | 2 +- ...ion-related-cows-on-satellite-images-pydata-global-2024.json | 2 +- ...data-pipelines-with-polars-deltalake-pydata-global-2024.json | 2 +- ...-series-forecasting-are-we-there-yet-pydata-global-2024.json | 2 +- ...in-the-browser-made-simple-by-pycafe-pydata-global-2024.json | 2 +- ...llm-applications-using-data-prep-kit-pydata-global-2024.json | 2 +- ...g-of-nested-ragged-dataframe-columns-pydata-global-2024.json | 2 +- ...e-data-that-shapes-foundational-llms-pydata-global-2024.json | 2 +- ...ers-new-features-in-apache-spark-4-0-pydata-global-2024.json | 2 +- ...g-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json | 2 +- ...s-for-polished-visuals-with-plotnine-pydata-global-2024.json | 2 +- ...tion-for-safety-on-the-factory-floor-pydata-global-2024.json | 2 +- ...ualization-using-python-and-cesiumjs-pydata-global-2024.json | 2 +- ...-winners-announcements-demo-showcase-pydata-global-2024.json | 2 +- ...open-data-and-open-models-for-graphs-pydata-global-2024.json | 2 +- ...ek-leveraging-csp-for-live-inference-pydata-global-2024.json | 2 +- ...ata-science-back-into-llm-evaluation-pydata-global-2024.json | 2 +- ...on-with-bayesian-experimental-design-pydata-global-2024.json | 2 +- ...rrao-understanding-polars-data-types-pydata-global-2024.json | 2 +- ...tarted-with-asynchronous-programming-pydata-global-2024.json | 2 +- ...lm-voice-bots-with-open-source-tools-pydata-global-2024.json | 2 +- ...proach-to-designing-pydata-workflows-pydata-global-2024.json | 2 +- ...aries-to-python-scientific-community-pydata-global-2024.json | 2 +- ...velopment-and-deployment-with-kitops-pydata-global-2024.json | 2 +- ...ries-anomaly-detection-in-production-pydata-global-2024.json | 2 +- ...-complete-and-robust-preference-data-pydata-global-2024.json | 2 +- ...n-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json | 2 +- ...o-online-machine-learning-with-river-pydata-global-2024.json | 2 +- ...-challenges-and-governance-solutions-pydata-global-2024.json | 2 +- ...ney-retooling-for-a-smaller-data-era-pydata-global-2024.json | 2 +- ...powered-rag-for-enterprise-documents-pydata-global-2024.json | 2 +- 66 files changed, 66 insertions(+), 66 deletions(-) diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json index b7f202601..1c720ff0c 100644 --- a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/qSs5ALVbzTk/maxresdefault.jpg", - "title": "Adarsh Namala - Scaling Outside the Warehouse Using DuckDB and Python | PyData Global 2024", + "title": "Adarsh Namala - Scaling Outside the Warehouse Using DuckDB and Python", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json index d63425cc1..0357f5170 100644 --- a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2UkZVKj6QGY/maxresdefault.jpg", - "title": "Aditi Juneja - Understanding API Dispatching in NetworkX | PyData Global 2024", + "title": "Aditi Juneja - Understanding API Dispatching in NetworkX", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json index 10aaa13a3..7b55cf96d 100644 --- a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/MGRmKlDj9rk/maxresdefault.jpg", - "title": "Adriana Stan - Off-the-shelf HuggingFace models for audio deepfake detection | PyData Global 2024", + "title": "Adriana Stan - Off-the-shelf HuggingFace models for audio deepfake detection", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json index f4781f3fc..019a894ad 100644 --- a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FdOeLY3rGA8/maxresdefault.jpg", - "title": "Akshay Ballal & Sonam Pankaj-The Memory Efficient Indexing for Vector Databases | PyData Global 2024", + "title": "Akshay Ballal & Sonam Pankaj-The Memory Efficient Indexing for Vector Databases", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json index 9f149c7e6..d0c926509 100644 --- a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/foMbacbuAQk/maxresdefault.jpg", - "title": "Allen Downey - Time Series Analysis with StatsModels | PyData Global 2024", + "title": "Allen Downey - Time Series Analysis with StatsModels", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json index 54217cccf..178a5717d 100644 --- a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FA1TWdxoyV4/maxresdefault.jpg", - "title": "Andrew Weeks - Taking Data Science in industry from zero to production | PyData Global 2024", + "title": "Andrew Weeks - Taking Data Science in industry from zero to production", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json index 07803a076..a8fcb287d 100644 --- a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Z2uz7kwBli8/maxresdefault.jpg", - "title": "Anton Antonov - Quantile Regression Workflows | PyData Global 2024", + "title": "Anton Antonov - Quantile Regression Workflows", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json index 6d360f89f..b07318144 100644 --- a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/wn1L3hlYfc0/maxresdefault.jpg", - "title": "Art Anderson - A Deep Dive into Python-Powered Precision and Scalability | PyData Global 2024", + "title": "Art Anderson - A Deep Dive into Python-Powered Precision and Scalability", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json index 1036587de..de6e441fb 100644 --- a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3iQFdcVf9jI/maxresdefault.jpg", - "title": "Atin Sanyal- Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast | PyData Global 2024", + "title": "Atin Sanyal- Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json index 03cd344e8..cac57989e 100644 --- a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/QgQzxcPZWxA/maxresdefault.jpg", - "title": "Avik Basu - Reproducible Python projects using Nix | PyData Global 2024", + "title": "Avik Basu - Reproducible Python projects using Nix", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json index 31cf57afe..151ff6fda 100644 --- a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ajLPA34upQY/maxresdefault.jpg", - "title": "Benjamin Vincent - Climbing the causal ladder for fun and profit | PyData Global 2024", + "title": "Benjamin Vincent - Climbing the causal ladder for fun and profit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json index 43cb96bb9..667e24268 100644 --- a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fi_S89jgUYU/maxresdefault.jpg", - "title": "Bill Engels & Chris Fonnesbeck - Making Gaussian Processes Useful | PyData Global 2024", + "title": "Bill Engels & Chris Fonnesbeck - Making Gaussian Processes Useful", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json index 494340289..082d4efe3 100644 --- a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -22,7 +22,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/TWxD76J5Uho/maxresdefault.jpg", - "title": "Borar, Liu, & Shrivastava - Build Your Own Transformer | PyData Global 2024", + "title": "Borar, Liu, & Shrivastava - Build Your Own Transformer", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json index f240ddd10..0d9a27f0b 100644 --- a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Pq5VhosMJQE/maxresdefault.jpg", - "title": "Brookes & Horne - Dashboards to Aid British Government Decisions (using R) | PyData Global 2024", + "title": "Brookes & Horne - Dashboards to Aid British Government Decisions (using R)", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json index 4fc43d600..3e4836d93 100644 --- a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/C7_nw2Rebfs/maxresdefault.jpg", - "title": "Brunelle & Kornacker - Hands-on Multimodal AI Development with Pixeltable | PyData Global 2024", + "title": "Brunelle & Kornacker - Hands-on Multimodal AI Development with Pixeltable", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json index 244406f1f..b51de941b 100644 --- a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/J2XOSdDWPIo/maxresdefault.jpg", - "title": "Chris Laffra - PyScript - Writing a Python application in the browser | PyData Global 2024", + "title": "Chris Laffra - PyScript - Writing a Python application in the browser", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json index cf0be8b69..598add43d 100644 --- a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/yW4oU-7_tGE/maxresdefault.jpg", - "title": "Chris Rackauckas - Open Source Component-Based Modeling with ModelingToolkit | PyData Global 2024", + "title": "Chris Rackauckas - Open Source Component-Based Modeling with ModelingToolkit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json index 6f29c2363..8a0ed42b8 100644 --- a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2Cst7_s_4H8/maxresdefault.jpg", - "title": "Daniel Chen - Tips to Level-Up Your Shiny for Python Applications | PyData Global 2024", + "title": "Daniel Chen - Tips to Level-Up Your Shiny for Python Applications", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json index f3cb00269..5dbf9425e 100644 --- a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/17Jm_Gqv3K8/maxresdefault.jpg", - "title": "Daniel Molina - Discover the Julia Machine Learning Ecosystem | PyData Global 2024", + "title": "Daniel Molina - Discover the Julia Machine Learning Ecosystem", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json index b367561a7..db0515b2d 100644 --- a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/xDcGtPgxXEk/maxresdefault.jpg", - "title": "Duarte Carmo - Panel: The Dashboard That Grew - A Scaling Saga | PyData Global 2024", + "title": "Duarte Carmo - Panel: The Dashboard That Grew - A Scaling Saga", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json index 5a7d3e2a2..ff6428961 100644 --- a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3Ks02G18anA/maxresdefault.jpg", - "title": "Elijah ben Izzy - Build Production Ready AI Agents with Burr | PyData Global 2024", + "title": "Elijah ben Izzy - Build Production Ready AI Agents with Burr", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json index 0d0f222d8..97256a178 100644 --- a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/iJp12vplXAc/maxresdefault.jpg", - "title": "Evan Wimpey - Python is a Joke! | PyData Global 2024", + "title": "Evan Wimpey - Python is a Joke!", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json index d478f8524..e5618d622 100644 --- a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/bl13uhchJVA/maxresdefault.jpg", - "title": "Eyal Gruss - Let our optima combine! | PyData Global 2024", + "title": "Eyal Gruss - Let our optima combine!", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json index fe4b89c0a..a04bf05a3 100644 --- a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -22,7 +22,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/POMePoP8M-w/maxresdefault.jpg", - "title": "Eyal Kazin - \ud83e\udde0\ud83e\uddf9 Causality - Mental Hygiene for Data Science | PyData Global 2024", + "title": "Eyal Kazin - \ud83e\udde0\ud83e\uddf9 Causality - Mental Hygiene for Data Science", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json index 7b4b2b346..afd7ce3b0 100644 --- a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -22,7 +22,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/aR-i_a3nGx0/maxresdefault.jpg", - "title": "Francesc Alted - Mastering Large NDArray Handling with Blosc2 and Caterva2 | PyData Global 2024", + "title": "Francesc Alted - Mastering Large NDArray Handling with Blosc2 and Caterva2", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json index 83b315467..f15628fbc 100644 --- a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/4sF-Wm8w31c/maxresdefault.jpg", - "title": "Guillaume Dalle - Automatic differentiation, a tale of two languages | PyData Global 2024", + "title": "Guillaume Dalle - Automatic differentiation, a tale of two languages", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json index 55c95c46a..ab34afdf7 100644 --- a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7UqLMHloTsQ/maxresdefault.jpg", - "title": "Hannes M\u00fchleisen - Changing Data With Confidence using DuckDB | PyData Global 2024", + "title": "Hannes M\u00fchleisen - Changing Data With Confidence using DuckDB", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json index 1a815539d..aaffff3b3 100644 --- a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KJxJRx7KQtc/maxresdefault.jpg", - "title": "Hendrik Makait - Dask \u2764\ufe0f Xarray: Geoscience at Massive Scale | PyData Global 2024", + "title": "Hendrik Makait - Dask \u2764\ufe0f Xarray: Geoscience at Massive Scale", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json index f9d8a3f1a..edfb2266f 100644 --- a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ft_PYi8A93M/maxresdefault.jpg", - "title": "Ian Ozsvald - Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge | PyData Global 2024", + "title": "Ian Ozsvald - Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json index 1ed8ffea7..1794907c7 100644 --- a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/o3dBxo6fgcA/maxresdefault.jpg", - "title": "Irina Vidal Migall\u00f3n - Trustworthy LLMs: Vibe checks are not all you need | PyData Global 2024", + "title": "Irina Vidal Migall\u00f3n - Trustworthy LLMs: Vibe checks are not all you need", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json index 38f30daff..e7e98b74a 100644 --- a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/LluyXFj9YDI/maxresdefault.jpg", - "title": "Jeff Bezanson - Statically-Compiled Julia for Library Development | PyData Global 2024", + "title": "Jeff Bezanson - Statically-Compiled Julia for Library Development", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json index 762a0146e..d8e6e2b2e 100644 --- a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7H44aJuK0Yg/maxresdefault.jpg", - "title": "John Mount - Solving Forecasting Problems in R and Python | PyData Global 2024", + "title": "John Mount - Solving Forecasting Problems in R and Python", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json index 30a73f0b6..d7aa65920 100644 --- a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/nCmBJJHGQKo/maxresdefault.jpg", - "title": "Jon Nordby - Microcontrollers + Machine Learning with MicroPython in 1-2-3 | PyData Global 2024", + "title": "Jon Nordby - Microcontrollers + Machine Learning with MicroPython in 1-2-3", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json index 6c5c54485..fb93f304f 100644 --- a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/exHRTSGZtAo/maxresdefault.jpg", - "title": "Joseph Oladokun-Bridging the Gap: Real-Time Predictive Analytics with Faustream | PyData Global 2024", + "title": "Joseph Oladokun-Bridging the Gap: Real-Time Predictive Analytics with Faustream", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json index 50324e41f..58de09927 100644 --- a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/rfXgtUYF3lw/maxresdefault.jpg", - "title": "Katrina Riehl & Jacob Tomlinson - GPU development with Python 101 | PyData Global 2024", + "title": "Katrina Riehl & Jacob Tomlinson - GPU development with Python 101", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json index da86bb47c..aa6897987 100644 --- a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3hSjftUjmWk/maxresdefault.jpg", - "title": "KEYNOTE: Peter Wang - Do Python and Data Science Matter in Our AI Future? | PyData Global 2024", + "title": "KEYNOTE: Peter Wang - Do Python and Data Science Matter in Our AI Future?", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json index 6bb723060..85fb9e410 100644 --- a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/1uex29BVbgg/maxresdefault.jpg", - "title": "Leonie Hodel - Using AI to Spot Deforestation-related Cows on Satellite Images | PyData Global 2024", + "title": "Leonie Hodel - Using AI to Spot Deforestation-related Cows on Satellite Images", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json index 48a5206db..60e22e516 100644 --- a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ZIrq9GsN2HM/maxresdefault.jpg", - "title": "Liam Brannigan - Build simple & scalable data pipelines with Polars & DeltaLake | PyData Global 2024", + "title": "Liam Brannigan - Build simple & scalable data pipelines with Polars & DeltaLake", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json index 313789600..3e6c14504 100644 --- a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/DZICL_8vdXI/maxresdefault.jpg", - "title": "Luca Baggi - Foundational Models for Time Series Forecasting: are we there yet? | PyData Global 2024", + "title": "Luca Baggi - Foundational Models for Time Series Forecasting: are we there yet?", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json index 68155f193..6ba5c8eef 100644 --- a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-adJy4MxZgE/maxresdefault.jpg", - "title": "Maarten Breddels - Python Apps in the Browser made simple by PyCafe | PyData Global 2024", + "title": "Maarten Breddels - Python Apps in the Browser made simple by PyCafe", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json index 5ef20f203..2dd27ad9f 100644 --- a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -22,7 +22,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GVA1XK0jrf8/maxresdefault.jpg", - "title": "Maniyam & Nielsen - Preparing Data for LLM Applications Using Data Prep Kit | PyData Global 2024", + "title": "Maniyam & Nielsen - Preparing Data for LLM Applications Using Data Prep Kit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json index ee9dc440e..dfec5b732 100644 --- a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/thfNEGCuwbY/maxresdefault.jpg", - "title": "Martin Durant- akimbo: vectorized processing of nested/ragged dataframe columns | PyData Global 2024", + "title": "Martin Durant- akimbo: vectorized processing of nested/ragged dataframe columns", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json index 35274daa3..f166e48eb 100644 --- a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/uV3HLROlcLM/maxresdefault.jpg", - "title": "Marysia Winkels - The Data That Shapes Foundational LLMs | PyData Global 2024", + "title": "Marysia Winkels - The Data That Shapes Foundational LLMs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json index 0e84d904e..2acd5a81c 100644 --- a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zBy3-NiylY8/maxresdefault.jpg", - "title": "Matthew Powers - New Features in Apache Spark 4.0 | PyData Global 2024", + "title": "Matthew Powers - New Features in Apache Spark 4.0", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json index 3a5c5b019..fd4fcc7b2 100644 --- a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fasxVtDQgK0/maxresdefault.jpg", - "title": "Michael Sarahan - Going Plaid: Striving for Speed of Light in CI pipelines | PyData Global 2024", + "title": "Michael Sarahan - Going Plaid: Striving for Speed of Light in CI pipelines", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json index d30a3abee..076190878 100644 --- a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/NBGJuaBF2rc/maxresdefault.jpg", - "title": "Nicola Rennie - Practical Techniques for Polished Visuals with Plotnine | PyData Global 2024", + "title": "Nicola Rennie - Practical Techniques for Polished Visuals with Plotnine", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json index 7e0475580..f59f5de70 100644 --- a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/G8ypUIlvlEg/maxresdefault.jpg", - "title": "Nicol\u00f2 Giso - Image Recognition for safety on the factory floor | PyData Global 2024", + "title": "Nicol\u00f2 Giso - Image Recognition for safety on the factory floor", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json index 1491a6432..d3391323d 100644 --- a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-6vh5vMgPHA/maxresdefault.jpg", - "title": "Nompumelelo Mtsweni- 3D geospatial data visualization using Python and Cesiumjs | PyData Global 2024", + "title": "Nompumelelo Mtsweni- 3D geospatial data visualization using Python and Cesiumjs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json index 0674e43d0..516560fe7 100644 --- a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json +++ b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/UPSGvCtKYGE/maxresdefault.jpg", - "title": "NumHack 2024: Winners Announcements & Demo Showcase | PyData Global 2024", + "title": "NumHack 2024: Winners Announcements & Demo Showcase", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json index afe0f5518..6b266dec0 100644 --- a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Nrsh6LzUk6A/maxresdefault.jpg", - "title": "Paco Nathan - Catching Bad Guys using open data and open models for graphs | PyData Global 2024", + "title": "Paco Nathan - Catching Bad Guys using open data and open models for graphs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json index dc351b969..3bdca5606 100644 --- a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/f5G8OVuRI3k/maxresdefault.jpg", - "title": "Pascal Tomecek - Leveraging CSP for Live Inference | PyData Global 2024", + "title": "Pascal Tomecek - Leveraging CSP for Live Inference", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json index 8c2279e9e..9be02d92e 100644 --- a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/vxSRIL1WD9g/maxresdefault.jpg", - "title": "Patrick Deziel - Putting the data science back into LLM evaluation | PyData Global 2024", + "title": "Patrick Deziel - Putting the data science back into LLM evaluation", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json index c165f01e0..d0f9aa88d 100644 --- a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zssnoI2JvTo/maxresdefault.jpg", - "title": "Quan Nguyen - Cost-effective data annotation with Bayesian experimental design | PyData Global 2024", + "title": "Quan Nguyen - Cost-effective data annotation with Bayesian experimental design", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json index 8bc6799f4..a67785efa 100644 --- a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -22,7 +22,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/8HwfVVknhP4/maxresdefault.jpg", - "title": "Rodrigo Gir\u00e3o Serr\u00e3o - Understanding Polars data types | PyData Global 2024", + "title": "Rodrigo Gir\u00e3o Serr\u00e3o - Understanding Polars data types", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json index 31a389754..c751debc9 100644 --- a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/oy7sEAfJsWw/maxresdefault.jpg", - "title": "Ryan Varley - Let's get you started with asynchronous programming | PyData Global 2024", + "title": "Ryan Varley - Let's get you started with asynchronous programming", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json index 5c4587462..8a5afa5cf 100644 --- a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Td5dFdG0wE4/maxresdefault.jpg", - "title": "Sara Zanzottera - Building LLM Voice Bots with Open Source Tools | PyData Global 2024", + "title": "Sara Zanzottera - Building LLM Voice Bots with Open Source Tools", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json index c769a8bb4..3a47f00d4 100644 --- a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KdgegsH3rAQ/maxresdefault.jpg", - "title": "Sayantika Banik - The LEGO Approach to designing PyData Workflows | PyData Global 2024", + "title": "Sayantika Banik - The LEGO Approach to designing PyData Workflows", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json index 8b1f72e3c..847d352ec 100644 --- a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ABao7JTDTMI/maxresdefault.jpg", - "title": "Sergey Maydanov - Bringing NVIDIA math libraries to Python scientific community | PyData Global 2024", + "title": "Sergey Maydanov - Bringing NVIDIA math libraries to Python scientific community", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json index 6c655efea..e86665e70 100644 --- a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/eExvPkSqTCQ/maxresdefault.jpg", - "title": "Shivay Lamba - Streamlining AI development and Deployment with KitOps | PyData Global 2024", + "title": "Shivay Lamba - Streamlining AI development and Deployment with KitOps", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json index e05c89808..8ba39104f 100644 --- a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ca4w2ZIZ0S0/maxresdefault.jpg", - "title": "Shreya Khurana - Realtime Time Series Anomaly Detection in Production | PyData Global 2024", + "title": "Shreya Khurana - Realtime Time Series Anomaly Detection in Production", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json index 86fabcbdf..0de6e6c63 100644 --- a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/E01yrxkmWvM/maxresdefault.jpg", - "title": "Son The Nguyen- Improve LLMs Alignment with Complete and Robust Preference Data | PyData Global 2024", + "title": "Son The Nguyen- Improve LLMs Alignment with Complete and Robust Preference Data", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json index ef9a0aa5c..768c45147 100644 --- a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/IJuzKZdiLCg/maxresdefault.jpg", - "title": "Timothy Spann - It's in the Air Tonight. Sensor Data in RAG | PyData Global 2024", + "title": "Timothy Spann - It's in the Air Tonight. Sensor Data in RAG", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json index 013af1443..a4cc38afb 100644 --- a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GhDRKUT9gZA/maxresdefault.jpg", - "title": "Tun Shwe - Moving from Offline to Online Machine Learning with River | PyData Global 2024", + "title": "Tun Shwe - Moving from Offline to Online Machine Learning with River", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json index 2698a2ab1..4f1eb9ab4 100644 --- a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/__VU52cv6jk/maxresdefault.jpg", - "title": "Vyoma Gajjar- LLMs in Regulated Industries: Challenges and Governance Solutions | PyData Global 2024", + "title": "Vyoma Gajjar- LLMs in Regulated Industries: Challenges and Governance Solutions", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json index 933e20269..cc11e625e 100644 --- a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/w4aYrav8-zE/maxresdefault.jpg", - "title": "Wes McKinney - Retooling for a Smaller Data Era | PyData Global 2024", + "title": "Wes McKinney - Retooling for a Smaller Data Era", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json index aa48ec2e9..e4827b971 100644 --- a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -18,7 +18,7 @@ ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pnacsAWnjV8/maxresdefault.jpg", - "title": "Zain Hasan - ColPali\u2019s Vision-Powered RAG for Enterprise Documents | PyData Global 2024", + "title": "Zain Hasan - ColPali\u2019s Vision-Powered RAG for Enterprise Documents", "videos": [ { "type": "youtube", From 45f21e28fddbd11820edc769fa11287d06175a26 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:17:13 +0000 Subject: [PATCH 4/7] Add Speaker information --- ...-duckdb-and-python-pydata-global-2024.json | 4 +-- ...tching-in-networkx-pydata-global-2024.json | 4 +-- ...deepfake-detection-pydata-global-2024.json | 4 +-- ...he-future-of-forecasting-or-just-hype.json | 4 +-- ...r-vector-databases-pydata-global-2024.json | 5 ++-- ...s-with-statsmodels-pydata-global-2024.json | 4 +-- ...gents-with-structured-text-generation.json | 4 +-- ...zero-to-production-pydata-global-2024.json | 4 +-- ...gression-workflows-pydata-global-2024.json | 4 +-- ...on-and-scalability-pydata-global-2024.json | 4 +-- ...ions-and-ship-fast-pydata-global-2024.json | 4 +-- ...projects-using-nix-pydata-global-2024.json | 4 +-- ...for-fun-and-profit-pydata-global-2024.json | 4 +-- ...n-processes-useful-pydata-global-2024.json | 5 ++-- ...models-for-japanese-medical-documents.json | 4 +-- ...ur-own-transformer-pydata-global-2024.json | 6 ++-- ...-travel-agent-that-never-hallucinates.json | 10 +++++-- ...-decisions-using-r-pydata-global-2024.json | 4 +-- ...nt-with-pixeltable-pydata-global-2024.json | 5 ++-- ...-track-and-optimize-model-performance.json | 4 +-- ...ion-in-the-browser-pydata-global-2024.json | 4 +-- ...th-modelingtoolkit-pydata-global-2024.json | 4 +-- ...uncertainty-quantification-with-mapie.json | 5 ++-- ...ython-applications-pydata-global-2024.json | 4 +-- ...learning-ecosystem-pydata-global-2024.json | 4 +-- ...ramming-in-data-engineering-workflows.json | 4 +-- ...rew-a-scaling-saga-pydata-global-2024.json | 4 +-- ...coherence-of-open-source-eval-metrics.json | 5 ++-- ...i-agents-with-burr-pydata-global-2024.json | 4 +-- ...y-python-is-a-joke-pydata-global-2024.json | 4 +-- ...our-optima-combine-pydata-global-2024.json | 4 +-- ...e-for-data-science-pydata-global-2024.json | 4 +-- ...losc2-and-caterva2-pydata-global-2024.json | 4 +-- ...usive-load-monitoring-for-iot-devices.json | 4 +-- ...e-of-two-languages-pydata-global-2024.json | 4 +-- ...dence-using-duckdb-pydata-global-2024.json | 4 +-- ...-data-science-in-university-education.json | 4 +-- ...e-at-massive-scale-pydata-global-2024.json | 4 +-- ...-on-with-scalable-serverless-analysis.json | 4 +-- ...-arc-agi-challenge-pydata-global-2024.json | 4 +-- ...e-not-all-you-need-pydata-global-2024.json | 4 +-- ...ython-environments-pydata-global-2024.json | 5 ++-- ...ibrary-development-pydata-global-2024.json | 4 +-- ...els-from-references-to-human-judgment.json | 5 ++-- ...ms-in-r-and-python-pydata-global-2024.json | 4 +-- ...nfair-bias-in-machine-learning-models.json | 4 +-- ...cropython-in-1-2-3-pydata-global-2024.json | 4 +-- ...ics-with-faustream-pydata-global-2024.json | 4 +-- ...tackling-common-data-challenges-in-ml.json | 4 +-- ...nt-with-python-101-pydata-global-2024.json | 5 ++-- ...-and-supercharge-your-pydata-workflow.json | 8 ++++-- ...r-in-our-ai-future-pydata-global-2024.json | 8 ++++-- ...on-changepoint-detection-segmentation.json | 6 ++-- ...tion-via-regression-on-shapley-values.json | 5 ++-- ...ove-agricultural-resilience-in-africa.json | 4 +-- ...n-satellite-images-pydata-global-2024.json | 4 +-- ...h-polars-deltalake-pydata-global-2024.json | 4 +-- ...mat-for-multi-modal-ai-data-pipelines.json | 5 ++-- ...g-are-we-there-yet-pydata-global-2024.json | 4 +-- ...e-simple-by-pycafe-pydata-global-2024.json | 4 +-- ...impact-of-effort-on-business-outcomes.json | 4 +-- ...sing-data-prep-kit-pydata-global-2024.json | 4 +-- ...d-llm-training-and-inference-pipeline.json | 4 +-- ...-dataframe-columns-pydata-global-2024.json | 4 +-- ...-foundational-llms-pydata-global-2024.json | 4 +-- ...n-apache-spark-4-0-pydata-global-2024.json | 4 +-- ...ht-in-ci-pipelines-pydata-global-2024.json | 4 +-- ...build-a-core-ml-platform-from-scratch.json | 4 +-- ...uals-with-plotnine-pydata-global-2024.json | 4 +-- ...-the-factory-floor-pydata-global-2024.json | 4 +-- ...ython-and-cesiumjs-pydata-global-2024.json | 4 +-- ...can-complete-the-future-of-innovation.json | 4 +-- ...ents-demo-showcase-pydata-global-2024.json | 28 ------------------- ...-models-for-graphs-pydata-global-2024.json | 4 +-- ...for-live-inference-pydata-global-2024.json | 4 +-- ...nto-llm-evaluation-pydata-global-2024.json | 4 +-- ...nd-vector-search-to-empower-retrieval.json | 4 +-- ...xperimental-design-pydata-global-2024.json | 4 +-- ...tching-of-large-datasets-using-splink.json | 4 +-- ...-polars-data-types-pydata-global-2024.json | 4 +-- ...ronous-programming-pydata-global-2024.json | 4 +-- ...-open-source-tools-pydata-global-2024.json | 4 +-- ...lving-role-of-the-r-development-guide.json | 4 +-- ...rategies-for-scientists-and-engineers.json | 4 +-- ...g-pydata-workflows-pydata-global-2024.json | 4 +-- ...ientific-community-pydata-global-2024.json | 4 +-- ...oyment-with-kitops-pydata-global-2024.json | 4 +-- ...tion-in-production-pydata-global-2024.json | 4 +-- ...nce-engagement-and-higher-conversions.json | 4 +-- ...st-preference-data-pydata-global-2024.json | 4 +-- ...sensor-data-in-rag-pydata-global-2024.json | 4 +-- ...ckage-for-efficient-big-data-analysis.json | 4 +-- ...fficiency-personalization-and-insight.json | 4 +-- ...earning-with-river-pydata-global-2024.json | 4 +-- ...vernance-solutions-pydata-global-2024.json | 4 +-- ...a-smaller-data-era-pydata-global-2024.json | 4 +-- ...terprise-documents-pydata-global-2024.json | 4 +-- ...identify-urgent-messages-in-real-time.json | 4 +-- 98 files changed, 219 insertions(+), 225 deletions(-) delete mode 100644 pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json index 1c720ff0c..b5ff1780b 100644 --- a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Adarsh Namala" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/qSs5ALVbzTk/maxresdefault.jpg", - "title": "Adarsh Namala - Scaling Outside the Warehouse Using DuckDB and Python", + "title": "Scaling Outside the Warehouse Using DuckDB and Python", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json index 0357f5170..f1b60b3ba 100644 --- a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Aditi Juneja" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2UkZVKj6QGY/maxresdefault.jpg", - "title": "Aditi Juneja - Understanding API Dispatching in NetworkX", + "title": "Understanding API Dispatching in NetworkX", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json index 7b55cf96d..4095964ec 100644 --- a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Adriana Stan" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/MGRmKlDj9rk/maxresdefault.jpg", - "title": "Adriana Stan - Off-the-shelf HuggingFace models for audio deepfake detection", + "title": "Off-the-shelf HuggingFace models for audio deepfake detection", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json index e4f55aba6..61f3f481f 100644 --- a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json +++ b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Ahad Shoaib" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/5Nt0p_3zU7g/maxresdefault.jpg", - "title": "Ahad Shoaib - Foundational Time Series Models in Practice: The Future of Forecasting, or Just Hype?", + "title": "Foundational Time Series Models in Practice: The Future of Forecasting, or Just Hype?", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json index 019a894ad..c68deb4df 100644 --- a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Akshay Ballal", + "Sonam Pankaj" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FdOeLY3rGA8/maxresdefault.jpg", - "title": "Akshay Ballal & Sonam Pankaj-The Memory Efficient Indexing for Vector Databases", + "title": "The Memory Efficient Indexing for Vector Databases", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json index d0c926509..a7771f868 100644 --- a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Allen Downey" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/foMbacbuAQk/maxresdefault.jpg", - "title": "Allen Downey - Time Series Analysis with StatsModels", + "title": "Time Series Analysis with StatsModels", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json index 8b0e485e4..51e884398 100644 --- a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json +++ b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Alonso Silva" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/94yuQKoDKkE/maxresdefault.jpg", - "title": "Alonso Silva - Building Knowledge Graph-Based Agents with Structured Text Generation", + "title": "Building Knowledge Graph-Based Agents with Structured Text Generation", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json index 178a5717d..7510aa742 100644 --- a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Andrew Weeks" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/FA1TWdxoyV4/maxresdefault.jpg", - "title": "Andrew Weeks - Taking Data Science in industry from zero to production", + "title": "Taking Data Science in industry from zero to production", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json index a8fcb287d..76cfd8413 100644 --- a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Anton Antonov" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Z2uz7kwBli8/maxresdefault.jpg", - "title": "Anton Antonov - Quantile Regression Workflows", + "title": "Quantile Regression Workflows", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json index b07318144..9457f1baf 100644 --- a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Art Anderson" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/wn1L3hlYfc0/maxresdefault.jpg", - "title": "Art Anderson - A Deep Dive into Python-Powered Precision and Scalability", + "title": "A Deep Dive into Python-Powered Precision and Scalability", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json index de6e441fb..5295ea865 100644 --- a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Atin Sanyal" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3iQFdcVf9jI/maxresdefault.jpg", - "title": "Atin Sanyal- Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast", + "title": "Effective GenAI Evaluations: Mitigate Hallucinations and Ship Fast", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json index cac57989e..9ae429840 100644 --- a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Avik Basu" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/QgQzxcPZWxA/maxresdefault.jpg", - "title": "Avik Basu - Reproducible Python projects using Nix", + "title": "Reproducible Python projects using Nix", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json index 151ff6fda..dcb60b65b 100644 --- a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Benjamin Vincent" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ajLPA34upQY/maxresdefault.jpg", - "title": "Benjamin Vincent - Climbing the causal ladder for fun and profit", + "title": "Climbing the causal ladder for fun and profit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json index 667e24268..7a7a5bdd8 100644 --- a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Bill Engels", + "Chris Fonnesbeck" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fi_S89jgUYU/maxresdefault.jpg", - "title": "Bill Engels & Chris Fonnesbeck - Making Gaussian Processes Useful", + "title": "Making Gaussian Processes Useful", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json index c785923af..50cf1df72 100644 --- a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json +++ b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Bing Wang" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/OitWeFVvShc/maxresdefault.jpg", - "title": "Bing Wang - An Evaluation of Open-Source OCR Models for Japanese Medical Documents", + "title": "An Evaluation of Open-Source OCR Models for Japanese Medical Documents", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json index 082d4efe3..d348150eb 100644 --- a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -18,11 +18,13 @@ } ], "speakers": [ - "TODO" + "Sheetal Borar", + "Chuxin Liu", + "Shefali Shrivastava" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/TWxD76J5Uho/maxresdefault.jpg", - "title": "Borar, Liu, & Shrivastava - Build Your Own Transformer", + "title": "Build Your Own Transformer", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json index 39f372408..624d83183 100644 --- a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json +++ b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json @@ -14,11 +14,15 @@ } ], "speakers": [ - "TODO" + "Hugo Bowne-Anderson", + "Alan Nichol", + "Justina Petraitytė" + ], + "tags": [ + "tutorial" ], - "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/V7HQCMcaJ8A/maxresdefault.jpg", - "title": "Bowne-Anderson, Nichol, & Petraityt\u0117 - Building an AI Travel Agent That Never Hallucinates", + "title": "Building an AI Travel Agent That Never Hallucinates", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json index 0d9a27f0b..e3911eceb 100644 --- a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Jeremy Horne" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Pq5VhosMJQE/maxresdefault.jpg", - "title": "Brookes & Horne - Dashboards to Aid British Government Decisions (using R)", + "title": "Dashboards to Aid British Government Decisions (using R)", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json index 3e4836d93..f499783fd 100644 --- a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Pierre Brunelle", + "Marcel Kornacker" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/C7_nw2Rebfs/maxresdefault.jpg", - "title": "Brunelle & Kornacker - Hands-on Multimodal AI Development with Pixeltable", + "title": "Hands-on Multimodal AI Development with Pixeltable", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json index 8f75642de..740f10e7c 100644 --- a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json +++ b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Cainã Max Couto da Silva" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pzSzhn9H6X4/maxresdefault.jpg", - "title": "Cain\u00e3 Max Couto da Silva - PyTorch Workflow Mastery: A Guide to Track and Optimize Model Performance", + "title": "PyTorch Workflow Mastery: A Guide to Track and Optimize Model Performance", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json index b51de941b..218d8b022 100644 --- a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Chris Laffra" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/J2XOSdDWPIo/maxresdefault.jpg", - "title": "Chris Laffra - PyScript - Writing a Python application in the browser", + "title": "PyScript - Writing a Python application in the browser", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json index 598add43d..e8fb17a78 100644 --- a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Chris Rackauckas" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/yW4oU-7_tGE/maxresdefault.jpg", - "title": "Chris Rackauckas - Open Source Component-Based Modeling with ModelingToolkit", + "title": "Open Source Component-Based Modeling with ModelingToolkit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json index ace810dc4..c3e568abe 100644 --- a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json +++ b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Hussein Jawad", + "Valentin Laurent" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ZkLIWS9dlZI/maxresdefault.jpg", - "title": "Cordier, Jawad, & Laurent - Boosting AI Reliability: Uncertainty Quantification with MAPIE", + "title": "Boosting AI Reliability: Uncertainty Quantification with MAPIE", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json index 8a0ed42b8..fa5dc4cd9 100644 --- a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Daniel Chen" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2Cst7_s_4H8/maxresdefault.jpg", - "title": "Daniel Chen - Tips to Level-Up Your Shiny for Python Applications", + "title": "Tips to Level-Up Your Shiny for Python Applications", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json index 5dbf9425e..5e46412f0 100644 --- a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Daniel Molina" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/17Jm_Gqv3K8/maxresdefault.jpg", - "title": "Daniel Molina - Discover the Julia Machine Learning Ecosystem", + "title": "Discover the Julia Machine Learning Ecosystem", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json index f873af889..cfac95fe5 100644 --- a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json +++ b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Daphné Grasselly" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7xrDlgaz-QM/maxresdefault.jpg", - "title": "Daphn\u00e9 Grasselly - Enabling Multi-Language Programming in Data Engineering Workflows", + "title": "Enabling Multi-Language Programming in Data Engineering Workflows", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json index db0515b2d..54d1caff2 100644 --- a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Duarte Carmo" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/xDcGtPgxXEk/maxresdefault.jpg", - "title": "Duarte Carmo - Panel: The Dashboard That Grew - A Scaling Saga", + "title": "Panel: The Dashboard That Grew - A Scaling Saga", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json index 11fc64ab1..cfddcb992 100644 --- a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json +++ b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Nour El Mawass", + "Joe Neeman" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/hCCJoJ5URD0/maxresdefault.jpg", - "title": "El Mawass & Neeman - Evaluating RAGs: On the correctness and coherence of Open Source eval metrics", + "title": "Evaluating RAGs: On the correctness and coherence of Open Source eval metrics", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json index ff6428961..4e803e97d 100644 --- a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Elijah ben Izzy" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3Ks02G18anA/maxresdefault.jpg", - "title": "Elijah ben Izzy - Build Production Ready AI Agents with Burr", + "title": "Build Production Ready AI Agents with Burr", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json index 97256a178..2db55a008 100644 --- a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Evan Wimpey" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/iJp12vplXAc/maxresdefault.jpg", - "title": "Evan Wimpey - Python is a Joke!", + "title": "Python is a Joke!", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json index e5618d622..8428c9ebc 100644 --- a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Eyal Gruss" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/bl13uhchJVA/maxresdefault.jpg", - "title": "Eyal Gruss - Let our optima combine!", + "title": "Let our optima combine!", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json index a04bf05a3..24450cfbe 100644 --- a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Eyal Kazin" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/POMePoP8M-w/maxresdefault.jpg", - "title": "Eyal Kazin - \ud83e\udde0\ud83e\uddf9 Causality - Mental Hygiene for Data Science", + "title": "🧠🧹 Causality - Mental Hygiene for Data Science", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json index afd7ce3b0..a3386548f 100644 --- a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Francesc Alted" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/aR-i_a3nGx0/maxresdefault.jpg", - "title": "Francesc Alted - Mastering Large NDArray Handling with Blosc2 and Caterva2", + "title": "Mastering Large NDArray Handling with Blosc2 and Caterva2", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json index 07e25e11a..bc82a9a1c 100644 --- a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json +++ b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Francesco Conti" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/cMtYqUqdzsA/maxresdefault.jpg", - "title": "Francesco Conti - Deep Learning in Energy Management: Non-Intrusive Load Monitoring for IoT Devices", + "title": "Deep Learning in Energy Management: Non-Intrusive Load Monitoring for IoT Devices", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json index f15628fbc..742131834 100644 --- a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Guillaume Dalle" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/4sF-Wm8w31c/maxresdefault.jpg", - "title": "Guillaume Dalle - Automatic differentiation, a tale of two languages", + "title": "Automatic differentiation, a tale of two languages", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json index ab34afdf7..38810e3e0 100644 --- a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Hannes Mühleisen" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7UqLMHloTsQ/maxresdefault.jpg", - "title": "Hannes M\u00fchleisen - Changing Data With Confidence using DuckDB", + "title": "Changing Data With Confidence using DuckDB", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json index 46efc5a83..876707501 100644 --- a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json +++ b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Hansila Sudasinghe" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/r3Diqvfy4Fo/maxresdefault.jpg", - "title": "Hansila Sudasinghe - PYDATA Bloom Framework: An Approach to Data Science in University Education", + "title": "PYDATA Bloom Framework: An Approach to Data Science in University Education", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json index aaffff3b3..194942a17 100644 --- a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Hendrik Makait" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KJxJRx7KQtc/maxresdefault.jpg", - "title": "Hendrik Makait - Dask \u2764\ufe0f Xarray: Geoscience at Massive Scale", + "title": "Dask ❤ Xarray: Geoscience at Massive Scale", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json index 4d3dbaad4..1acac5f43 100644 --- a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Tim Swena", ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2D5-7zIeOQ4/maxresdefault.jpg", - "title": "Hsia, Swena & Williams- Python + BigQuery + DataFrames: Hands on with scalable \"serverless\" analysis", + "title": "Python + BigQuery + DataFrames: Hands on with scalable \"serverless\" analysis", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json index edfb2266f..173b5de53 100644 --- a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Ian Ozsvald" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ft_PYi8A93M/maxresdefault.jpg", - "title": "Ian Ozsvald - Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge", + "title": "Valuable LLM lessons learnt on Kaggle's ARC AGI Challenge", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json index 1794907c7..767aa9c64 100644 --- a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Irina Vidal Migallón" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/o3dBxo6fgcA/maxresdefault.jpg", - "title": "Irina Vidal Migall\u00f3n - Trustworthy LLMs: Vibe checks are not all you need", + "title": "Trustworthy LLMs: Vibe checks are not all you need", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json index ec700f77d..414b4f70a 100644 --- a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json +++ b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Jacob Tomlinson", + "Melody Wang" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ghq-VDNvNss/maxresdefault.jpg", - "title": "Jacob Tomlinson & Melody Wang- The art of wrangling your GPU Python environments |PyData Global 2024", + "title": "The art of wrangling your GPU Python environments", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json index e7e98b74a..990cedb8a 100644 --- a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Jeff Bezanson" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/LluyXFj9YDI/maxresdefault.jpg", - "title": "Jeff Bezanson - Statically-Compiled Julia for Library Development", + "title": "Statically-Compiled Julia for Library Development", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json index 41f409496..e774a0e09 100644 --- a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json +++ b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Parin Jhaveri", + "Riya Joshi" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ObFGLVEPYoc/maxresdefault.jpg", - "title": "Jhaveri & Joshi - Holistic Evaluation of Large Language Models: From References to Human Judgment", + "title": "Holistic Evaluation of Large Language Models: From References to Human Judgment", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json index d8e6e2b2e..ba79fec67 100644 --- a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "John Mount" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/7H44aJuK0Yg/maxresdefault.jpg", - "title": "John Mount - Solving Forecasting Problems in R and Python", + "title": "Solving Forecasting Problems in R and Python", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json index d6810aceb..a43b9f624 100644 --- a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json +++ b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "John Sandall" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/G1I45toaVSs/maxresdefault.jpg", - "title": "John Sandall - Fairness Tales: How To Measure And Mitigate Unfair Bias in Machine Learning Models", + "title": "Fairness Tales: How To Measure And Mitigate Unfair Bias in Machine Learning Models", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json index d7aa65920..327b00be3 100644 --- a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Jon Nordby" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/nCmBJJHGQKo/maxresdefault.jpg", - "title": "Jon Nordby - Microcontrollers + Machine Learning with MicroPython in 1-2-3", + "title": "Microcontrollers + Machine Learning with MicroPython in 1-2-3", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json index fb93f304f..dcefcd510 100644 --- a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Joseph Oladokun" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/exHRTSGZtAo/maxresdefault.jpg", - "title": "Joseph Oladokun-Bridging the Gap: Real-Time Predictive Analytics with Faustream", + "title": "Bridging the Gap: Real-Time Predictive Analytics with Faustream", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json index a9cce1961..7695805c1 100644 --- a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json +++ b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Kalyan Prasad" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2ugMOAYwLpQ/maxresdefault.jpg", - "title": "Kalyan Prasad - The Hidden Costs of Data Quality - Tackling Common Data Challenges in ML", + "title": "The Hidden Costs of Data Quality - Tackling Common Data Challenges in ML", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json index 58de09927..2ecb783cf 100644 --- a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Katrina Riehl", + "Jacob Tomlinson" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/rfXgtUYF3lw/maxresdefault.jpg", - "title": "Katrina Riehl & Jacob Tomlinson - GPU development with Python 101", + "title": "GPU development with Python 101", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json index 3cfc6e353..f9e7ad9ff 100644 --- a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json +++ b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json @@ -18,11 +18,13 @@ } ], "speakers": [ - "TODO" + "Jeroen Janssens" + ], + "tags": [ + "Keynote" ], - "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/siPGvvrfylQ/maxresdefault.jpg", - "title": "KEYNOTE: Dr. Jeroen Janssens - Embrace the Unix Command Line and Supercharge Your PyData Workflow", + "title": "KEYNOTE: Embrace the Unix Command Line and Supercharge Your PyData Workflow", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json index aa6897987..610492c74 100644 --- a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -14,11 +14,13 @@ } ], "speakers": [ - "TODO" + "Peter Wang" + ], + "tags": [ + "Keynote" ], - "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/3hSjftUjmWk/maxresdefault.jpg", - "title": "KEYNOTE: Peter Wang - Do Python and Data Science Matter in Our AI Future?", + "title": "Do Python and Data Science Matter in Our AI Future?", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json index 77a53ad36..b4108d48c 100644 --- a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json +++ b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json @@ -14,11 +14,13 @@ } ], "speakers": [ - "TODO" + "Franz Kiraly", + "Christopher Risi", + "Martin Tveten" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/VwhevNkxjYw/maxresdefault.jpg", - "title": "Kiraly, Risi, & Tveten - sktime: time series anomaly detection, changepoint detection, segmentation", + "title": "sktime: time series anomaly detection, changepoint detection, segmentation", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json index 0010c5e62..99837568f 100644 --- a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json +++ b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Baran Köseoğlu", + "Egor Kraev" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pmqvyrIyB_8/maxresdefault.jpg", - "title": "Koseoglu & Kraev - Fast, intuitive feature selection via regression on Shapley values", + "title": "Fast, intuitive feature selection via regression on Shapley values", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json index e894ba3e8..905c7cfa7 100644 --- a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json +++ b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Kristal Joi Wise" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/wMwEmlhyYh0/maxresdefault.jpg", - "title": "Kristal Joi Wise - Harnessing Machine Learning to Improve Agricultural Resilience in Africa", + "title": "Harnessing Machine Learning to Improve Agricultural Resilience in Africa", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json index 85fb9e410..026cf3219 100644 --- a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Leonie Hodel" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/1uex29BVbgg/maxresdefault.jpg", - "title": "Leonie Hodel - Using AI to Spot Deforestation-related Cows on Satellite Images", + "title": "Using AI to Spot Deforestation-related Cows on Satellite Images", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json index 60e22e516..a5dcadb97 100644 --- a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Liam Brannigan" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ZIrq9GsN2HM/maxresdefault.jpg", - "title": "Liam Brannigan - Build simple & scalable data pipelines with Polars & DeltaLake", + "title": "Build simple & scalable data pipelines with Polars & DeltaLake", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json index d8c59e3d3..a1bb03295 100644 --- a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json +++ b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json @@ -14,11 +14,12 @@ } ], "speakers": [ - "TODO" + "Lu Qiu", + "Allison Wang" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/noZNcpYRrkk/maxresdefault.jpg", - "title": "Lu Qiu & Allison Wang - Empowering PySpark with Lance Format for Multi-Modal AI Data Pipelines", + "title": "Empowering PySpark with Lance Format for Multi-Modal AI Data Pipelines", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json index 3e6c14504..c0290f011 100644 --- a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Luca Baggi" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/DZICL_8vdXI/maxresdefault.jpg", - "title": "Luca Baggi - Foundational Models for Time Series Forecasting: are we there yet?", + "title": "Foundational Models for Time Series Forecasting: are we there yet?", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json index 6ba5c8eef..85d1ebe8c 100644 --- a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Maarten Breddels" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-adJy4MxZgE/maxresdefault.jpg", - "title": "Maarten Breddels - Python Apps in the Browser made simple by PyCafe", + "title": "Python Apps in the Browser made simple by PyCafe", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json index 0d8407852..2f4018091 100644 --- a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json +++ b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Maggie Wolff" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2-0iWgVC2oc/maxresdefault.jpg", - "title": "Maggie Wolff - Measuring the User Experience and the Impact of Effort on Business Outcomes", + "title": "Measuring the User Experience and the Impact of Effort on Business Outcomes", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json index 2dd27ad9f..8d1431737 100644 --- a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Sujee Maniyam" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GVA1XK0jrf8/maxresdefault.jpg", - "title": "Maniyam & Nielsen - Preparing Data for LLM Applications Using Data Prep Kit", + "title": "Preparing Data for LLM Applications Using Data Prep Kit", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json index fb0120232..e02e73b70 100644 --- a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json +++ b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Mark Moyou" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/V2L6hufE2X4/maxresdefault.jpg", - "title": "Mark Moyou, PhD - Understanding the end-to-end LLM training and inference pipeline", + "title": "Understanding the end-to-end LLM training and inference pipeline", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json index dfec5b732..97e710905 100644 --- a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Martin Durant" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/thfNEGCuwbY/maxresdefault.jpg", - "title": "Martin Durant- akimbo: vectorized processing of nested/ragged dataframe columns", + "title": "akimbo: vectorized processing of nested/ragged dataframe columns", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json index f166e48eb..341a206d7 100644 --- a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Marysia Winkels" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/uV3HLROlcLM/maxresdefault.jpg", - "title": "Marysia Winkels - The Data That Shapes Foundational LLMs", + "title": "The Data That Shapes Foundational LLMs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json index 2acd5a81c..d6f8a3ebd 100644 --- a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Matthew Powers" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zBy3-NiylY8/maxresdefault.jpg", - "title": "Matthew Powers - New Features in Apache Spark 4.0", + "title": "New Features in Apache Spark 4.0", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json index fd4fcc7b2..f9430ae9e 100644 --- a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Michael Sarahan" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/fasxVtDQgK0/maxresdefault.jpg", - "title": "Michael Sarahan - Going Plaid: Striving for Speed of Light in CI pipelines", + "title": "Going Plaid: Striving for Speed of Light in CI pipelines", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json index 052f02eb5..974a1b71e 100644 --- a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json +++ b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Nathan Colbert" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/XkNeCavaJtw/maxresdefault.jpg", - "title": "Nathan Colbert - From Inference to Features: Build a Core ML Platform from Scratch", + "title": "From Inference to Features: Build a Core ML Platform from Scratch", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json index 076190878..b6aec2154 100644 --- a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Nicola Rennie" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/NBGJuaBF2rc/maxresdefault.jpg", - "title": "Nicola Rennie - Practical Techniques for Polished Visuals with Plotnine", + "title": "Practical Techniques for Polished Visuals with Plotnine", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json index f59f5de70..a9e009bc0 100644 --- a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Nicolò Giso" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/G8ypUIlvlEg/maxresdefault.jpg", - "title": "Nicol\u00f2 Giso - Image Recognition for safety on the factory floor", + "title": "Image Recognition for safety on the factory floor", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json index d3391323d..999941ed1 100644 --- a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Nompumelelo Mtsweni" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/-6vh5vMgPHA/maxresdefault.jpg", - "title": "Nompumelelo Mtsweni- 3D geospatial data visualization using Python and Cesiumjs", + "title": "3D geospatial data visualization using Python and Cesiumjs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json index 02a5ccf3c..c2081f5ca 100644 --- a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json +++ b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Noor Aftab" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2k72xRc67wE/maxresdefault.jpg", - "title": "Noor Aftab - The Missing 78%: How Women in AI & Data Can Complete the Future of Innovation", + "title": "The Missing 78%: How Women in AI & Data Can Complete the Future of Innovation", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json b/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json deleted file mode 100644 index 516560fe7..000000000 --- a/pydata-global-2024/videos/numhack-2024-winners-announcements-demo-showcase-pydata-global-2024.json +++ /dev/null @@ -1,28 +0,0 @@ -{ - "description": "www.pydata.org\n\nJoin us to celebrate the innovative minds behind NumHack 2024!\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", - "duration": 2542, - "language": "eng", - "recorded": "2024-12-03", - "related_urls": [ - { - "label": "Conference Website", - "url": "https://pydata.org/global2024" - }, - { - "label": "https://github.com/numfocus/YouTubeVideoTimestamps", - "url": "https://github.com/numfocus/YouTubeVideoTimestamps" - } - ], - "speakers": [ - "TODO" - ], - "tags": [], - "thumbnail_url": "https://i.ytimg.com/vi/UPSGvCtKYGE/maxresdefault.jpg", - "title": "NumHack 2024: Winners Announcements & Demo Showcase", - "videos": [ - { - "type": "youtube", - "url": "https://www.youtube.com/watch?v=UPSGvCtKYGE" - } - ] -} diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json index 6b266dec0..f4cefd567 100644 --- a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Paco Nathan" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Nrsh6LzUk6A/maxresdefault.jpg", - "title": "Paco Nathan - Catching Bad Guys using open data and open models for graphs", + "title": "Catching Bad Guys using open data and open models for graphs", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json index 3bdca5606..284faea67 100644 --- a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Pascal Tomecek" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/f5G8OVuRI3k/maxresdefault.jpg", - "title": "Pascal Tomecek - Leveraging CSP for Live Inference", + "title": "Leveraging CSP for Live Inference", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json index 9be02d92e..92fcb8468 100644 --- a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Patrick Deziel" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/vxSRIL1WD9g/maxresdefault.jpg", - "title": "Patrick Deziel - Putting the data science back into LLM evaluation", + "title": "Putting the data science back into LLM evaluation", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json index 13be54023..a4c6b4931 100644 --- a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json +++ b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Prashanth Rao" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ky2yufsffas/maxresdefault.jpg", - "title": "Prashanth Rao - Graph RAG: Bringing together graph and vector search to empower retrieval", + "title": "Graph RAG: Bringing together graph and vector search to empower retrieval", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json index d0f9aa88d..84809afae 100644 --- a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Quan Nguyen" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/zssnoI2JvTo/maxresdefault.jpg", - "title": "Quan Nguyen - Cost-effective data annotation with Bayesian experimental design", + "title": "Cost-effective data annotation with Bayesian experimental design", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json index 745da91e6..23d39c1bf 100644 --- a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json +++ b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Robin Linacre" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/eQtFkI8f02U/maxresdefault.jpg", - "title": "Robin Linacre - Rapid deduplication and fuzzy matching of large datasets using Splink", + "title": "Rapid deduplication and fuzzy matching of large datasets using Splink", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json index a67785efa..17f1d2f7f 100644 --- a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Rodrigo Girão Serrão" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/8HwfVVknhP4/maxresdefault.jpg", - "title": "Rodrigo Gir\u00e3o Serr\u00e3o - Understanding Polars data types", + "title": "Understanding Polars data types", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json index c751debc9..2ba75d946 100644 --- a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Ryan Varley" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/oy7sEAfJsWw/maxresdefault.jpg", - "title": "Ryan Varley - Let's get you started with asynchronous programming", + "title": "Let's get you started with asynchronous programming", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json index 8a5afa5cf..2a6258fcd 100644 --- a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Sara Zanzottera" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Td5dFdG0wE4/maxresdefault.jpg", - "title": "Sara Zanzottera - Building LLM Voice Bots with Open Source Tools", + "title": "Building LLM Voice Bots with Open Source Tools", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json index 463a09193..7b72b231c 100644 --- a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json +++ b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Saranjeet Kaur Bhogal" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/l5BwwvQlwG0/maxresdefault.jpg", - "title": "Saranjeet Kaur Bhogal - Empowering New Contributors: The Evolving Role of the R Development Guide", + "title": "Empowering New Contributors: The Evolving Role of the R Development Guide", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json index 50513bf07..1ff436e36 100644 --- a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json +++ b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Saurabh Garg" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/YYZ6vcumojo/maxresdefault.jpg", - "title": "Saurabh Garg - Navigating Cloud Expenses in Data & AI: Strategies for Scientists and Engineers", + "title": "Navigating Cloud Expenses in Data & AI: Strategies for Scientists and Engineers", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json index 3a47f00d4..345bce3c1 100644 --- a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Sayantika Banik" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/KdgegsH3rAQ/maxresdefault.jpg", - "title": "Sayantika Banik - The LEGO Approach to designing PyData Workflows", + "title": "The LEGO Approach to designing PyData Workflows", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json index 847d352ec..3f0e639d8 100644 --- a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Sergey Maydanov" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ABao7JTDTMI/maxresdefault.jpg", - "title": "Sergey Maydanov - Bringing NVIDIA math libraries to Python scientific community", + "title": "Bringing NVIDIA math libraries to Python scientific community", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json index e86665e70..71db684be 100644 --- a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Shivay Lamba" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/eExvPkSqTCQ/maxresdefault.jpg", - "title": "Shivay Lamba - Streamlining AI development and Deployment with KitOps", + "title": "Streamlining AI development and Deployment with KitOps", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json index 8ba39104f..9672b7013 100644 --- a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Shreya Khurana" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/ca4w2ZIZ0S0/maxresdefault.jpg", - "title": "Shreya Khurana - Realtime Time Series Anomaly Detection in Production", + "title": "Realtime Time Series Anomaly Detection in Production", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json index e03152ea5..7abd8a791 100644 --- a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json +++ b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Shrikanth Singh" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/d1SaxtX7L6E/maxresdefault.jpg", - "title": "Shrikanth Singh - Automating SEA Retargeting for Smarter Audience Engagement and Higher Conversions", + "title": "Automating SEA Retargeting for Smarter Audience Engagement and Higher Conversions", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json index 0de6e6c63..86fb183ff 100644 --- a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Son The Nguyen" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/E01yrxkmWvM/maxresdefault.jpg", - "title": "Son The Nguyen- Improve LLMs Alignment with Complete and Robust Preference Data", + "title": "Improve LLMs Alignment with Complete and Robust Preference Data", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json index 768c45147..261dad5f7 100644 --- a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Timothy Spann" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/IJuzKZdiLCg/maxresdefault.jpg", - "title": "Timothy Spann - It's in the Air Tonight. Sensor Data in RAG", + "title": "It's in the Air Tonight. Sensor Data in RAG", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json index fd61b7642..37ab83837 100644 --- a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json +++ b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json @@ -18,11 +18,11 @@ } ], "speakers": [ - "TODO" + "Toby Dylan Hocking" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/l_7FXnppu-g/maxresdefault.jpg", - "title": "Toby Dylan Hocking- Using and contributing to the data.table package for efficient big data analysis", + "title": "Using and contributing to the data.table package for efficient big data analysis", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json index a11011158..c29aef583 100644 --- a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json +++ b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Tony Ojeda" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/I7jVu-fHavI/maxresdefault.jpg", - "title": "Tony Ojeda - Generative AI + Python: Unlocking Efficiency, Personalization, and Insight", + "title": "Generative AI + Python: Unlocking Efficiency, Personalization, and Insight", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json index a4cc38afb..9cf0be601 100644 --- a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Tun Shwe" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/GhDRKUT9gZA/maxresdefault.jpg", - "title": "Tun Shwe - Moving from Offline to Online Machine Learning with River", + "title": "Moving from Offline to Online Machine Learning with River", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json index 4f1eb9ab4..5e2502274 100644 --- a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Vyoma Gajjar" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/__VU52cv6jk/maxresdefault.jpg", - "title": "Vyoma Gajjar- LLMs in Regulated Industries: Challenges and Governance Solutions", + "title": "LLMs in Regulated Industries: Challenges and Governance Solutions", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json index cc11e625e..047cd00f0 100644 --- a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Wes McKinney" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/w4aYrav8-zE/maxresdefault.jpg", - "title": "Wes McKinney - Retooling for a Smaller Data Era", + "title": "Retooling for a Smaller Data Era", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json index e4827b971..6ab6e6d19 100644 --- a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Zain Hasan" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/pnacsAWnjV8/maxresdefault.jpg", - "title": "Zain Hasan - ColPali\u2019s Vision-Powered RAG for Enterprise Documents", + "title": "ColPali’s Vision-Powered RAG for Enterprise Documents", "videos": [ { "type": "youtube", diff --git a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json index 2589489f7..15fb4b701 100644 --- a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json +++ b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json @@ -14,11 +14,11 @@ } ], "speakers": [ - "TODO" + "Zhen (Tony) Zhao" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/Lb0ecRiz4xE/maxresdefault.jpg", - "title": "Zhen (Tony) Zhao - Training Language Models to Identify Urgent Messages in Real-Time", + "title": "Training Language Models to Identify Urgent Messages in Real-Time", "videos": [ { "type": "youtube", From 8f2c7149bc49ab12eb98632523ec7bf038565b9a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:18:10 +0000 Subject: [PATCH 5/7] Fix conflicting characters --- .../videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json index 2db55a008..9c58c2e8f 100644 --- a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nEnjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.*\n\n*Note the baseline\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "www.pydata.org\n\nEnjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.\n\nNote the baseline\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1699, "language": "eng", "recorded": "2024-12-03", From bf6eab21581341e960d934716da5232554a97f4e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Leonardo=20Casta=C3=B1o?= <14986783+ELC@users.noreply.github.com> Date: Fri, 27 Jun 2025 02:27:41 +0000 Subject: [PATCH 6/7] Fix conflicting characters --- ...y-dataframes-hands-on-with-scalable-serverless-analysis.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json index 1acac5f43..1d1601d94 100644 --- a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -14,7 +14,7 @@ } ], "speakers": [ - "Tim Swena", + "Tim Swena" ], "tags": [], "thumbnail_url": "https://i.ytimg.com/vi/2D5-7zIeOQ4/maxresdefault.jpg", From 4f0bea3e23a841f8e6313f3e11a90847badb5610 Mon Sep 17 00:00:00 2001 From: Jon Banafato Date: Tue, 27 Jan 2026 22:04:35 -0500 Subject: [PATCH 7/7] Remove boilerplate text from PyData Global 2024 talk descriptions --- ...he-warehouse-using-duckdb-and-python-pydata-global-2024.json | 2 +- ...standing-api-dispatching-in-networkx-pydata-global-2024.json | 2 +- ...-models-for-audio-deepfake-detection-pydata-global-2024.json | 2 +- ...dels-in-practice-the-future-of-forecasting-or-just-hype.json | 2 +- ...icient-indexing-for-vector-databases-pydata-global-2024.json | 2 +- ...ime-series-analysis-with-statsmodels-pydata-global-2024.json | 2 +- ...edge-graph-based-agents-with-structured-text-generation.json | 2 +- ...-in-industry-from-zero-to-production-pydata-global-2024.json | 2 +- ...ntonov-quantile-regression-workflows-pydata-global-2024.json | 2 +- ...on-powered-precision-and-scalability-pydata-global-2024.json | 2 +- ...itigate-hallucinations-and-ship-fast-pydata-global-2024.json | 2 +- ...producible-python-projects-using-nix-pydata-global-2024.json | 2 +- ...the-causal-ladder-for-fun-and-profit-pydata-global-2024.json | 2 +- ...eck-making-gaussian-processes-useful-pydata-global-2024.json | 2 +- ...f-open-source-ocr-models-for-japanese-medical-documents.json | 2 +- ...rivastava-build-your-own-transformer-pydata-global-2024.json | 2 +- ...yte-building-an-ai-travel-agent-that-never-hallucinates.json | 2 +- ...british-government-decisions-using-r-pydata-global-2024.json | 2 +- ...modal-ai-development-with-pixeltable-pydata-global-2024.json | 2 +- ...mastery-a-guide-to-track-and-optimize-model-performance.json | 2 +- ...-a-python-application-in-the-browser-pydata-global-2024.json | 2 +- ...-based-modeling-with-modelingtoolkit-pydata-global-2024.json | 2 +- ...ng-ai-reliability-uncertainty-quantification-with-mapie.json | 2 +- ...p-your-shiny-for-python-applications-pydata-global-2024.json | 2 +- ...the-julia-machine-learning-ecosystem-pydata-global-2024.json | 2 +- ...ulti-language-programming-in-data-engineering-workflows.json | 2 +- ...e-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json | 2 +- ...e-correctness-and-coherence-of-open-source-eval-metrics.json | 2 +- ...production-ready-ai-agents-with-burr-pydata-global-2024.json | 2 +- .../videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json | 2 +- .../eyal-gruss-let-our-optima-combine-pydata-global-2024.json | 2 +- ...lity-mental-hygiene-for-data-science-pydata-global-2024.json | 2 +- ...ay-handling-with-blosc2-and-caterva2-pydata-global-2024.json | 2 +- ...anagement-non-intrusive-load-monitoring-for-iot-devices.json | 2 +- ...ferentiation-a-tale-of-two-languages-pydata-global-2024.json | 2 +- ...ng-data-with-confidence-using-duckdb-pydata-global-2024.json | 2 +- ...ork-an-approach-to-data-science-in-university-education.json | 2 +- ...k-xarray-geoscience-at-massive-scale-pydata-global-2024.json | 2 +- ...y-dataframes-hands-on-with-scalable-serverless-analysis.json | 2 +- ...learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json | 2 +- ...lms-vibe-checks-are-not-all-you-need-pydata-global-2024.json | 2 +- ...angling-your-gpu-python-environments-pydata-global-2024.json | 2 +- ...mpiled-julia-for-library-development-pydata-global-2024.json | 2 +- ...large-language-models-from-references-to-human-judgment.json | 2 +- ...forecasting-problems-in-r-and-python-pydata-global-2024.json | 2 +- ...ure-and-mitigate-unfair-bias-in-machine-learning-models.json | 2 +- ...e-learning-with-micropython-in-1-2-3-pydata-global-2024.json | 2 +- ...-predictive-analytics-with-faustream-pydata-global-2024.json | 2 +- ...s-of-data-quality-tackling-common-data-challenges-in-ml.json | 2 +- ...nson-gpu-development-with-python-101-pydata-global-2024.json | 2 +- ...-unix-command-line-and-supercharge-your-pydata-workflow.json | 2 +- ...data-science-matter-in-our-ai-future-pydata-global-2024.json | 2 +- ...es-anomaly-detection-changepoint-detection-segmentation.json | 2 +- ...tive-feature-selection-via-regression-on-shapley-values.json | 2 +- ...e-learning-to-improve-agricultural-resilience-in-africa.json | 2 +- ...ion-related-cows-on-satellite-images-pydata-global-2024.json | 2 +- ...data-pipelines-with-polars-deltalake-pydata-global-2024.json | 2 +- ...ark-with-lance-format-for-multi-modal-ai-data-pipelines.json | 2 +- ...-series-forecasting-are-we-there-yet-pydata-global-2024.json | 2 +- ...in-the-browser-made-simple-by-pycafe-pydata-global-2024.json | 2 +- ...xperience-and-the-impact-of-effort-on-business-outcomes.json | 2 +- ...llm-applications-using-data-prep-kit-pydata-global-2024.json | 2 +- ...ding-the-end-to-end-llm-training-and-inference-pipeline.json | 2 +- ...g-of-nested-ragged-dataframe-columns-pydata-global-2024.json | 2 +- ...e-data-that-shapes-foundational-llms-pydata-global-2024.json | 2 +- ...ers-new-features-in-apache-spark-4-0-pydata-global-2024.json | 2 +- ...g-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json | 2 +- ...rence-to-features-build-a-core-ml-platform-from-scratch.json | 2 +- ...s-for-polished-visuals-with-plotnine-pydata-global-2024.json | 2 +- ...tion-for-safety-on-the-factory-floor-pydata-global-2024.json | 2 +- ...ualization-using-python-and-cesiumjs-pydata-global-2024.json | 2 +- ...-women-in-ai-data-can-complete-the-future-of-innovation.json | 2 +- ...open-data-and-open-models-for-graphs-pydata-global-2024.json | 2 +- ...ek-leveraging-csp-for-live-inference-pydata-global-2024.json | 2 +- ...ata-science-back-into-llm-evaluation-pydata-global-2024.json | 2 +- ...g-together-graph-and-vector-search-to-empower-retrieval.json | 2 +- ...on-with-bayesian-experimental-design-pydata-global-2024.json | 2 +- ...ation-and-fuzzy-matching-of-large-datasets-using-splink.json | 2 +- ...rrao-understanding-polars-data-types-pydata-global-2024.json | 2 +- ...tarted-with-asynchronous-programming-pydata-global-2024.json | 2 +- ...lm-voice-bots-with-open-source-tools-pydata-global-2024.json | 2 +- ...ntributors-the-evolving-role-of-the-r-development-guide.json | 2 +- ...nses-in-data-ai-strategies-for-scientists-and-engineers.json | 2 +- ...proach-to-designing-pydata-workflows-pydata-global-2024.json | 2 +- ...aries-to-python-scientific-community-pydata-global-2024.json | 2 +- ...velopment-and-deployment-with-kitops-pydata-global-2024.json | 2 +- ...ries-anomaly-detection-in-production-pydata-global-2024.json | 2 +- ...-for-smarter-audience-engagement-and-higher-conversions.json | 2 +- ...-complete-and-robust-preference-data-pydata-global-2024.json | 2 +- ...n-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json | 2 +- ...-the-data-table-package-for-efficient-big-data-analysis.json | 2 +- ...python-unlocking-efficiency-personalization-and-insight.json | 2 +- ...o-online-machine-learning-with-river-pydata-global-2024.json | 2 +- ...-challenges-and-governance-solutions-pydata-global-2024.json | 2 +- ...ney-retooling-for-a-smaller-data-era-pydata-global-2024.json | 2 +- ...powered-rag-for-enterprise-documents-pydata-global-2024.json | 2 +- ...anguage-models-to-identify-urgent-messages-in-real-time.json | 2 +- 97 files changed, 97 insertions(+), 97 deletions(-) diff --git a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json index b5ff1780b..e848c5727 100644 --- a/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/adarsh-namala-scaling-outside-the-warehouse-using-duckdb-and-python-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nDuckDB is revolutionizing data processing by enabling in-memory OLAP SQL operations with a lightweight, dependency-free architecture. This talk explores how DuckDB can be leveraged to handle large-scale, massively parallel data processing, ranging from hundreds of gigabytes to terabytes, outside traditional SQL and Spark warehouse systems. We will go over the integration with the Python ecosystem and demonstrate its scaling potential using the cloud compute.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "DuckDB is revolutionizing data processing by enabling in-memory OLAP SQL operations with a lightweight, dependency-free architecture. This talk explores how DuckDB can be leveraged to handle large-scale, massively parallel data processing, ranging from hundreds of gigabytes to terabytes, outside traditional SQL and Spark warehouse systems. We will go over the integration with the Python ecosystem and demonstrate its scaling potential using the cloud compute.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1772, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json index f1b60b3ba..0e7dfa625 100644 --- a/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json +++ b/pydata-global-2024/videos/aditi-juneja-understanding-api-dispatching-in-networkx-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nHi! Have you ever wished your pure Python libraries were faster? Or wanted to fundamentally improve a Python library by rewriting everything in a faster language like C or Rust? Well, wish no more... NetworkX's backend dispatching mechanism redirects your plain old NetworkX function calls to a FASTER implementation present in a separate backend package by leveraging the Python's entry_point specification!\n\nNetworkX is a popular, pure Python library used for graph(aka network) analysis. But when the graph size increases (like a network of everyone in the world), then NetworkX algorithms could take days to solve a simple graph analysis problem. So, to address these performance issues, a backend dispatching mechanism was recently developed. In this talk, we will unveil this dispatching mechanism and its implementation details, and how we can use it just by specifying a backend kwarg like this:\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Hi! Have you ever wished your pure Python libraries were faster? Or wanted to fundamentally improve a Python library by rewriting everything in a faster language like C or Rust? Well, wish no more... NetworkX's backend dispatching mechanism redirects your plain old NetworkX function calls to a FASTER implementation present in a separate backend package by leveraging the Python's entry_point specification!\n\nNetworkX is a popular, pure Python library used for graph(aka network) analysis. But when the graph size increases (like a network of everyone in the world), then NetworkX algorithms could take days to solve a simple graph analysis problem. So, to address these performance issues, a backend dispatching mechanism was recently developed. In this talk, we will unveil this dispatching mechanism and its implementation details, and how we can use it just by specifying a backend kwarg like this:\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1746, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json index 4095964ec..419b01703 100644 --- a/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json +++ b/pydata-global-2024/videos/adriana-stan-off-the-shelf-huggingface-models-for-audio-deepfake-detection-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk will cover how to use pre-trained HuggingFace models, specifically wav2vec 2.0 and WavLM, to detect audio deepfakes. These deepfakes, made possible by advanced voice cloning tools like ElevenLabs and Respeecher, present risks in areas like misinformation, fraud, and privacy violations. The session will introduce deepfake audio, discuss current trends in voice cloning, and provide a hands-on tutorial for using these transformer-based models to identify synthetic voices by spotting subtle anomalies. Participants will learn how to set up these models, analyze deepfake audio datasets, and assess detection performance, bridging the gap between speech generation and detection technologies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk will cover how to use pre-trained HuggingFace models, specifically wav2vec 2.0 and WavLM, to detect audio deepfakes. These deepfakes, made possible by advanced voice cloning tools like ElevenLabs and Respeecher, present risks in areas like misinformation, fraud, and privacy violations. The session will introduce deepfake audio, discuss current trends in voice cloning, and provide a hands-on tutorial for using these transformer-based models to identify synthetic voices by spotting subtle anomalies. Participants will learn how to set up these models, analyze deepfake audio datasets, and assess detection performance, bridging the gap between speech generation and detection technologies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1857, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json index 61f3f481f..f1d9fcfd8 100644 --- a/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json +++ b/pydata-global-2024/videos/ahad-shoaib-foundational-time-series-models-in-practice-the-future-of-forecasting-or-just-hype.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nBeneath the buzz of AI breakthroughs, a quiet revolution is unfolding in the world of forecasting: foundational time series models. These models promise to change the game for operational forecasting, but don\u2019t expect magic. You won\u2019t suddenly become a stock market oracle just by throwing data at them.\n\nIn this talk, we\u2019ll peel back the layers of these new time series models, starting with how they work and how they evolved from transformers. We\u2019ll tackle the big problems of limited data and overhyped algorithms, and explore the real-world challenges that make or break forecasts (hint: human input matters).\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Beneath the buzz of AI breakthroughs, a quiet revolution is unfolding in the world of forecasting: foundational time series models. These models promise to change the game for operational forecasting, but don’t expect magic. You won’t suddenly become a stock market oracle just by throwing data at them.\n\nIn this talk, we’ll peel back the layers of these new time series models, starting with how they work and how they evolved from transformers. We’ll tackle the big problems of limited data and overhyped algorithms, and explore the real-world challenges that make or break forecasts (hint: human input matters).\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1865, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json index c68deb4df..76b03768f 100644 --- a/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json +++ b/pydata-global-2024/videos/akshay-ballal-sonam-pankaj-the-memory-efficient-indexing-for-vector-databases-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nVector databases are everywhere, powering LLMs. But indexing embeddings, especially multivector embeddings like ColPali and Colbert, at a bulk is memory intensive. Vector streaming solves this problem by parallelizing the tasks of parsing, chunking, and embedding generation and indexing it continuously chunk by chunk instead of bulk. This not only increase the speed but also makes the whole task more optimized and memory efficient.\n\nThe library gives many vector database supports, like Pinecone, Weavaite, and Elastic.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Vector databases are everywhere, powering LLMs. But indexing embeddings, especially multivector embeddings like ColPali and Colbert, at a bulk is memory intensive. Vector streaming solves this problem by parallelizing the tasks of parsing, chunking, and embedding generation and indexing it continuously chunk by chunk instead of bulk. This not only increase the speed but also makes the whole task more optimized and memory efficient.\n\nThe library gives many vector database supports, like Pinecone, Weavaite, and Elastic.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1680, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json index a7771f868..3e95fb3ce 100644 --- a/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json +++ b/pydata-global-2024/videos/allen-downey-time-series-analysis-with-statsmodels-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTime series analysis provides essential tools for modeling and predicting time-dependent data, especially data exhibiting seasonal patterns or serial correlation. This tutorial covers tools in the StatsModels library including seasonal decomposition and ARIMA. We'll develop the ARIMA model bottom-up, implementing it one piece at a time, and then using StatsModels. As examples, we'll look at weather data and electricity generation from renewable sources in the United States since 2004 -- but the methods we'll cover apply to many kinds of real-world time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Time series analysis provides essential tools for modeling and predicting time-dependent data, especially data exhibiting seasonal patterns or serial correlation. This tutorial covers tools in the StatsModels library including seasonal decomposition and ARIMA. We'll develop the ARIMA model bottom-up, implementing it one piece at a time, and then using StatsModels. As examples, we'll look at weather data and electricity generation from renewable sources in the United States since 2004 -- but the methods we'll cover apply to many kinds of real-world time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5376, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json index 51e884398..a76d3f19e 100644 --- a/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json +++ b/pydata-global-2024/videos/alonso-silva-building-knowledge-graph-based-agents-with-structured-text-generation.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nKnowledge graphs are excellent at representing and storing heterogeneous and interconnected information in a structured manner, effectively capturing complex relationships and attributes across different data types.\nStructured text generation allows for building knowledge graphs by providing neatly structured outputs, making it an ideal method for extracting structured information.\nSimilarly, structured text generation enables the creation of agents by defining which tools are allowed and what action inputs are permitted.\nIn this talk, we first build a graph database from unstructured data and then we create an agent to query the graph database. We will show these capabilities with a demo.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Knowledge graphs are excellent at representing and storing heterogeneous and interconnected information in a structured manner, effectively capturing complex relationships and attributes across different data types.\nStructured text generation allows for building knowledge graphs by providing neatly structured outputs, making it an ideal method for extracting structured information.\nSimilarly, structured text generation enables the creation of agents by defining which tools are allowed and what action inputs are permitted.\nIn this talk, we first build a graph database from unstructured data and then we create an agent to query the graph database. We will show these capabilities with a demo.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1696, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json index 7510aa742..812105db7 100644 --- a/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/andrew-weeks-taking-data-science-in-industry-from-zero-to-production-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTaking any project from zero to production is challenging. And Data Science has a particularly high failure rate, with a lot of ideas not getting beyond the prototype stage.\n\nBut there are real reasons for this: there is intrinsic and unknown complexity in data, and there are often big challenges knowing if we have actually solved the problem -- the answer is so rarely \"yes\" or \"no\".\n\nIn this talk I'll cover some key learnings from a decade working on DS problems at early- and later-stage startups, building products to improve product market fit.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Taking any project from zero to production is challenging. And Data Science has a particularly high failure rate, with a lot of ideas not getting beyond the prototype stage.\n\nBut there are real reasons for this: there is intrinsic and unknown complexity in data, and there are often big challenges knowing if we have actually solved the problem -- the answer is so rarely \"yes\" or \"no\".\n\nIn this talk I'll cover some key learnings from a decade working on DS problems at early- and later-stage startups, building products to improve product market fit.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1706, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json index 76cfd8413..0fae8c72a 100644 --- a/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/anton-antonov-quantile-regression-workflows-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk showcases and exemplifies the rapid specification and execution of Quantile Regression workflows. Various use cases are discussed, including fitting, outlier detection, conditional CDFs, and simulations, using different types of time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk showcases and exemplifies the rapid specification and execution of Quantile Regression workflows. Various use cases are discussed, including fitting, outlier detection, conditional CDFs, and simulations, using different types of time series data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1752, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json index 9457f1baf..1709a2ab4 100644 --- a/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json +++ b/pydata-global-2024/videos/art-anderson-a-deep-dive-into-python-powered-precision-and-scalability-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLearn how we built a lightning-fast search engine using Python, balancing speed, relevance, and scalability. In this session, we\u2019ll explore our hybrid approach, blending vector search with traditional keyword indexing to deliver high quality, accurate results. Discover how we harness a high-performance NoSQL database for efficient data management and fine-tune our results with a re-ranking algorithm for top-notch accuracy.\nWe\u2019ll dive into the hurdles we overcame, like ensuring data consistency in a NoSQL setup, balancing search precision and performance, and designing a scalable architecture. By the end, you\u2019ll understand how this Python-powered engine works, its real-world applications, and the innovative solutions that set it apart.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Learn how we built a lightning-fast search engine using Python, balancing speed, relevance, and scalability. In this session, we’ll explore our hybrid approach, blending vector search with traditional keyword indexing to deliver high quality, accurate results. Discover how we harness a high-performance NoSQL database for efficient data management and fine-tune our results with a re-ranking algorithm for top-notch accuracy.\nWe’ll dive into the hurdles we overcame, like ensuring data consistency in a NoSQL setup, balancing search precision and performance, and designing a scalable architecture. By the end, you’ll understand how this Python-powered engine works, its real-world applications, and the innovative solutions that set it apart.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1669, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json index 5295ea865..ef9b2cfbc 100644 --- a/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json +++ b/pydata-global-2024/videos/atin-sanyal-effective-genai-evaluations-mitigate-hallucinations-and-ship-fast-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nRapid adoption of generative AI requires ensuring your application is trustworthy. Careful experimentation and measurement are necessary for this new era of non-deterministic software. In this talk, we will take learnings from 100s of conversations across enterprise AI teams, and discuss how developers can mitigate hallucinations, better inspect their AI systems, and productionize applications with effective guardrails and evaluation checks in place.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Rapid adoption of generative AI requires ensuring your application is trustworthy. Careful experimentation and measurement are necessary for this new era of non-deterministic software. In this talk, we will take learnings from 100s of conversations across enterprise AI teams, and discuss how developers can mitigate hallucinations, better inspect their AI systems, and productionize applications with effective guardrails and evaluation checks in place.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1737, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json index 9ae429840..f789c3d83 100644 --- a/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json +++ b/pydata-global-2024/videos/avik-basu-reproducible-python-projects-using-nix-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAs data scientists and machine learning engineers, it is crucial that we can reproduce results and seamlessly share projects across teams and stakeholders. However, differing operating systems, Python environments, package versions, and package managers often hinder reproducibility across different machines. This talk will explore how Nix can be leveraged to create reproducible work environments and how it can be a convenient tool for any Data Scientist or ML Engineer.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "As data scientists and machine learning engineers, it is crucial that we can reproduce results and seamlessly share projects across teams and stakeholders. However, differing operating systems, Python environments, package versions, and package managers often hinder reproducibility across different machines. This talk will explore how Nix can be leveraged to create reproducible work environments and how it can be a convenient tool for any Data Scientist or ML Engineer.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1739, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json index dcb60b65b..aecdb473e 100644 --- a/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json +++ b/pydata-global-2024/videos/benjamin-vincent-climbing-the-causal-ladder-for-fun-and-profit-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn this talk, we will explore Judea Pearl\u2019s causal ladder (association, intervention, and counterfactuals) through the lens of a simple demand forecasting model. Using real-world business scenarios, I will demonstrate how to move beyond correlation-based predictions to more actionable decisions using PyMC\u2019s causal inference tools. Attendees will learn how to make forecasts for natural business conditions, simulate the effects of strategic changes (like increased advertising spend), and evaluate the causal impact of past price promotion with retrodictive causal inference.\n\nTarget audience: Data scientists, machine learning engineers, and business analysts looking to improve their decision-making using causal inference.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In this talk, we will explore Judea Pearl’s causal ladder (association, intervention, and counterfactuals) through the lens of a simple demand forecasting model. Using real-world business scenarios, I will demonstrate how to move beyond correlation-based predictions to more actionable decisions using PyMC’s causal inference tools. Attendees will learn how to make forecasts for natural business conditions, simulate the effects of strategic changes (like increased advertising spend), and evaluate the causal impact of past price promotion with retrodictive causal inference.\n\nTarget audience: Data scientists, machine learning engineers, and business analysts looking to improve their decision-making using causal inference.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1798, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json index 7a7a5bdd8..bf812a6c9 100644 --- a/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json +++ b/pydata-global-2024/videos/bill-engels-chris-fonnesbeck-making-gaussian-processes-useful-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThe goal of this tutorial is to make Gaussian processes (GPs) useful. In most practicing data scientists' mental map of modeling and machine learning techniques, Gaussian processes are an advanced approach that sit alone on an island, perhaps with narrow use cases like Bayesian optimization. Most books and other material on GPs tend to focus on theoretical aspects, and it can be hard to close the gap between the theory and putting those ideas into practice to solve real problems in a reasonable amount of time.\n\nThis tutorial is split into two parts. The first part introduces Bayesian modeling, focusing on hierarchical modeling and the concept of partial pooling. We\u2019ll use the classic example of estimating the batting average of a group of baseball players as motivation. Then we\u2019ll introduce GPs as a useful generalization of hierarchical modeling for the common situation where our groups aren\u2019t distinct categories. Instead of thinking of each baseball player as completely distinct and exchangeable entities, we can use a GP to partially pool information locally by also considering each player's age. Finally we\u2019ll close the first part by connecting back to the more common introduction to GPs as infinite dimensional multivariate normals.\n\nThe second part of the tutorial will give an overview of practical tips and tricks for modeling with GPs using the open source Python package PyMC. Specifically, how to address the two big issues to using GPs in practice: scaling and identifiability. We\u2019ll discuss useful approximations like the HSGP and when to apply them, advice on when to use splines, and finally when you need to step out of a PPL like PyMC or Stan to a GP specific library like GPFlow or GPyTorch. We\u2019ll do so with a couple motivating examples. The audience should have some familiarity with basic ML and statistics concepts, such as probability distributions, normal and multivariate normal distributions, correlation and covariance, and linear regression - but the talk will aim to be non-technical and the goal will be introduce GPs and give people the tools they need to use them effectively in practice.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "The goal of this tutorial is to make Gaussian processes (GPs) useful. In most practicing data scientists' mental map of modeling and machine learning techniques, Gaussian processes are an advanced approach that sit alone on an island, perhaps with narrow use cases like Bayesian optimization. Most books and other material on GPs tend to focus on theoretical aspects, and it can be hard to close the gap between the theory and putting those ideas into practice to solve real problems in a reasonable amount of time.\n\nThis tutorial is split into two parts. The first part introduces Bayesian modeling, focusing on hierarchical modeling and the concept of partial pooling. We’ll use the classic example of estimating the batting average of a group of baseball players as motivation. Then we’ll introduce GPs as a useful generalization of hierarchical modeling for the common situation where our groups aren’t distinct categories. Instead of thinking of each baseball player as completely distinct and exchangeable entities, we can use a GP to partially pool information locally by also considering each player's age. Finally we’ll close the first part by connecting back to the more common introduction to GPs as infinite dimensional multivariate normals.\n\nThe second part of the tutorial will give an overview of practical tips and tricks for modeling with GPs using the open source Python package PyMC. Specifically, how to address the two big issues to using GPs in practice: scaling and identifiability. We’ll discuss useful approximations like the HSGP and when to apply them, advice on when to use splines, and finally when you need to step out of a PPL like PyMC or Stan to a GP specific library like GPFlow or GPyTorch. We’ll do so with a couple motivating examples. The audience should have some familiarity with basic ML and statistics concepts, such as probability distributions, normal and multivariate normal distributions, correlation and covariance, and linear regression - but the talk will aim to be non-technical and the goal will be introduce GPs and give people the tools they need to use them effectively in practice.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5385, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json index 50cf1df72..0e708424a 100644 --- a/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json +++ b/pydata-global-2024/videos/bing-wang-an-evaluation-of-open-source-ocr-models-for-japanese-medical-documents.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTo identify a production-ready, open-source OCR model capable of handling sensitive, non-English content with highly technical language, we evaluated the performance of available open-source OCR models in terms of accuracy, memory efficiency, and processing speed. This presentation will share our findings and key insights gained from this research\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "To identify a production-ready, open-source OCR model capable of handling sensitive, non-English content with highly technical language, we evaluated the performance of available open-source OCR models in terms of accuracy, memory efficiency, and processing speed. This presentation will share our findings and key insights gained from this research\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2197, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json index d348150eb..5d255b249 100644 --- a/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json +++ b/pydata-global-2024/videos/borar-liu-shrivastava-build-your-own-transformer-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nColab Notebook Link: https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing\nMake a copy to your local drive to start working on this notebook.\n\nEver wondered how groundbreaking language models like ChatGPT and Llama were built? The answer lies in transformer, a powerful neural network architecture. In this workshop, we'll dive deep into the inner workings of transformers, with specific focus on self-attention mechanism. We will guide you through the process of building one from scratch. Whether you're a beginner or an experienced practitioner, this workshop is designed to cater to all levels of expertise.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Colab Notebook Link: https://colab.research.google.com/drive/1faxDHE3LdAwH7MORdnJei87Q0WF1BhS0?usp=sharing\nMake a copy to your local drive to start working on this notebook.\n\nEver wondered how groundbreaking language models like ChatGPT and Llama were built? The answer lies in transformer, a powerful neural network architecture. In this workshop, we'll dive deep into the inner workings of transformers, with specific focus on self-attention mechanism. We will guide you through the process of building one from scratch. Whether you're a beginner or an experienced practitioner, this workshop is designed to cater to all levels of expertise.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5337, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json index 624d83183..9090991ea 100644 --- a/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json +++ b/pydata-global-2024/videos/bowne-anderson-nichol-petraityte-building-an-ai-travel-agent-that-never-hallucinates.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLMs offer powerful capabilities, but deploying them effectively in production remains a challenge for conversational AI and Chatbot applications, especially when it comes to minimizing hallucinations and ensuring accurate responses. In this 90-minute hands-on tutorial, we\u2019ll explore building conversational AI systems using CALM and Rasa. CALM (Conversational AI Language Model) combines traditional conversational AI techniques with LLMs, separating conversational ability from business logic execution to deliver reliable, cost efficient, and scalable solutions. Unlike LLMs that handle both sides of the conversation, CALM focuses on user understanding with predefined business logic. This approach not only accelerates development but also enhances cost efficiency, scalability and reliability. By focusing on predefined business logic with CALM, you\u2019ll gain the ability to build sophisticated, scalable systems faster. You\u2019ll also learn how to use fine-tuned, open-weight models, such as llama 8b to power your AI assistant.\n\nParticipants will learn how to use CALM for business logic and Rasa for dialogue management, with practical insights, code examples, and best practices. Materials will be provided via a GitHub repository with a GitHub Codespace for easy access and execution.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "LMs offer powerful capabilities, but deploying them effectively in production remains a challenge for conversational AI and Chatbot applications, especially when it comes to minimizing hallucinations and ensuring accurate responses. In this 90-minute hands-on tutorial, we’ll explore building conversational AI systems using CALM and Rasa. CALM (Conversational AI Language Model) combines traditional conversational AI techniques with LLMs, separating conversational ability from business logic execution to deliver reliable, cost efficient, and scalable solutions. Unlike LLMs that handle both sides of the conversation, CALM focuses on user understanding with predefined business logic. This approach not only accelerates development but also enhances cost efficiency, scalability and reliability. By focusing on predefined business logic with CALM, you’ll gain the ability to build sophisticated, scalable systems faster. You’ll also learn how to use fine-tuned, open-weight models, such as llama 8b to power your AI assistant.\n\nParticipants will learn how to use CALM for business logic and Rasa for dialogue management, with practical insights, code examples, and best practices. Materials will be provided via a GitHub repository with a GitHub Codespace for easy access and execution.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 4953, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json index e3911eceb..690b4e130 100644 --- a/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json +++ b/pydata-global-2024/videos/brookes-horne-dashboards-to-aid-british-government-decisions-using-r-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn partnership with the Department for Environment, Food and Rural Affairs (DEFRA), Datacove developed a bespoke Shiny dashboard designed to enhance decision-making in the areas of Health and Wellbeing, Nature, and Sustainability (HWNS). This presentation explores three key aspects: project and data management, customisation, and usability enhancements in R.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In partnership with the Department for Environment, Food and Rural Affairs (DEFRA), Datacove developed a bespoke Shiny dashboard designed to enhance decision-making in the areas of Health and Wellbeing, Nature, and Sustainability (HWNS). This presentation explores three key aspects: project and data management, customisation, and usability enhancements in R.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1659, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json index f499783fd..badd39168 100644 --- a/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json +++ b/pydata-global-2024/videos/brunelle-kornacker-hands-on-multimodal-ai-development-with-pixeltable-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis tutorial introduces Pixeltable, which provides data-centric AI infrastructure with a declarative, incremental approach for multimodal workloads. Participants will learn to manage multimodal data (text, images, video) using Pixeltable's declarative interface. We'll cover data versioning, indexing, and orchestration through computed columns and iterators. Attendees will gain practical experience with Pixeltable's integration capabilities and custom UDFs.\n\nRequirements: Python knowledge, basic ML concepts. Materials will be available via a GitHub repository and Google Colab notebooks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This tutorial introduces Pixeltable, which provides data-centric AI infrastructure with a declarative, incremental approach for multimodal workloads. Participants will learn to manage multimodal data (text, images, video) using Pixeltable's declarative interface. We'll cover data versioning, indexing, and orchestration through computed columns and iterators. Attendees will gain practical experience with Pixeltable's integration capabilities and custom UDFs.\n\nRequirements: Python knowledge, basic ML concepts. Materials will be available via a GitHub repository and Google Colab notebooks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1513, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json index 740f10e7c..3b1ff6eba 100644 --- a/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json +++ b/pydata-global-2024/videos/caina-max-couto-da-silva-pytorch-workflow-mastery-a-guide-to-track-and-optimize-model-performance.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis tutorial empowers deep learning practitioners to master the entire PyTorch workflow, from efficient model creation to advanced tracking and optimization techniques. We'll begin by exploring a practical PyTorch workflow, then delve into integrating popular experiment tracking tools like MLFlow and Weights & Biases. You'll learn to log custom metrics, artifacts, and interactive visualizations, enhancing your model development process. Finally, we'll tackle hyperparameter optimization using Optuna's Bayesian search, all while maintaining meticulous experiment tracking for easy comparison and reproducibility.\n\nBy the end of the session, you'll have constructed a robust, modular pipeline for managing experiments and optimizing model performance. Whether you're new to PyTorch or an experienced data scientist looking to improve your workflow, this hands-on tutorial offers immediately applicable insights and techniques to enhance your deep learning projects across diverse domains.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This tutorial empowers deep learning practitioners to master the entire PyTorch workflow, from efficient model creation to advanced tracking and optimization techniques. We'll begin by exploring a practical PyTorch workflow, then delve into integrating popular experiment tracking tools like MLFlow and Weights & Biases. You'll learn to log custom metrics, artifacts, and interactive visualizations, enhancing your model development process. Finally, we'll tackle hyperparameter optimization using Optuna's Bayesian search, all while maintaining meticulous experiment tracking for easy comparison and reproducibility.\n\nBy the end of the session, you'll have constructed a robust, modular pipeline for managing experiments and optimizing model performance. Whether you're new to PyTorch or an experienced data scientist looking to improve your workflow, this hands-on tutorial offers immediately applicable insights and techniques to enhance your deep learning projects across diverse domains.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5443, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json index 218d8b022..67653ed48 100644 --- a/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-laffra-pyscript-writing-a-python-application-in-the-browser-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLearn how to write a native Python application in the browser using WebAssembly enabled by PyScript.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Learn how to write a native Python application in the browser using WebAssembly enabled by PyScript.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5925, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json index e8fb17a78..2eca347f4 100644 --- a/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json +++ b/pydata-global-2024/videos/chris-rackauckas-open-source-component-based-modeling-with-modelingtoolkit-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nComponent-based modeling systems such as Simulink and Dymola allow for building scientific models in a way that can be composed. For example, Bob can build a model of an engine, and Alice can build a model of a drive shaft, and you can then connect the two models and have a model of a car. These kinds of tools are used all throughout industrial modeling and simulation in order to allow for \"separation of concerns\", allowing experts to engineer their domain and compose the final digital twins with reusable scientific modules. But what about open source? In this talk we will introduce ModelingToolkit, an open source component-based modeling framework that allows for composing pre-built models and scales to large high-fidelity digital twins.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Component-based modeling systems such as Simulink and Dymola allow for building scientific models in a way that can be composed. For example, Bob can build a model of an engine, and Alice can build a model of a drive shaft, and you can then connect the two models and have a model of a car. These kinds of tools are used all throughout industrial modeling and simulation in order to allow for \"separation of concerns\", allowing experts to engineer their domain and compose the final digital twins with reusable scientific modules. But what about open source? In this talk we will introduce ModelingToolkit, an open source component-based modeling framework that allows for composing pre-built models and scales to large high-fidelity digital twins.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1643, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json index c3e568abe..dff0fab77 100644 --- a/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json +++ b/pydata-global-2024/videos/cordier-jawad-laurent-boosting-ai-reliability-uncertainty-quantification-with-mapie.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nMAPIE (Model Agnostic Prediction Interval Estimator) is your go-to solution for managing uncertainties and risks in machine learning models. This Python library, nestled within scikit-learn-contrib, offers a way to calculate prediction sets with controlled coverage rates for regression and classification tasks.\n\nBut it doesn't stop there - MAPIE can also be used to handle more complex tasks like time series analysis, multi-label classification, computer vision and natural language processing, ensuring probabilistic guarantees on crucial metrics.\n\nJoin us as we delve into the world of conformal predictions and how to quickly manage your uncertainties using MAPIE.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "MAPIE (Model Agnostic Prediction Interval Estimator) is your go-to solution for managing uncertainties and risks in machine learning models. This Python library, nestled within scikit-learn-contrib, offers a way to calculate prediction sets with controlled coverage rates for regression and classification tasks.\n\nBut it doesn't stop there - MAPIE can also be used to handle more complex tasks like time series analysis, multi-label classification, computer vision and natural language processing, ensuring probabilistic guarantees on crucial metrics.\n\nJoin us as we delve into the world of conformal predictions and how to quickly manage your uncertainties using MAPIE.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5056, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json index fa5dc4cd9..3d82b4f1b 100644 --- a/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-chen-tips-to-level-up-your-shiny-for-python-applications-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nShiny for Python is an efficient and reactive application framework that will be able to grow with your application needs. As your shiny application grows, you may find yourself needing more custom behaviors and potentially reusing and sharing your custom behaviors with others. \nYou may also find your existing applications to be overly complex and had to see the overall structure of the application. Here are some tips on writing better Shiny Applications and leveling up your code.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Shiny for Python is an efficient and reactive application framework that will be able to grow with your application needs. As your shiny application grows, you may find yourself needing more custom behaviors and potentially reusing and sharing your custom behaviors with others. \nYou may also find your existing applications to be overly complex and had to see the overall structure of the application. Here are some tips on writing better Shiny Applications and leveling up your code.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1841, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json index 5e46412f0..65fcbbd52 100644 --- a/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json +++ b/pydata-global-2024/videos/daniel-molina-discover-the-julia-machine-learning-ecosystem-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nJulia is a high-performance language for technical computing that offers advantages like type stability, just-in-time compilation, and extensive parallel computing support. Its Machine Learning ecosystem, although having fewer options, is functional and includes packages like DataFrames.jl, Flux.jl, MLJ.jl, and SciML for various ML tasks. Additional tools cover data visualization, R compatibility, and specific ML applications. The ecosystem is comprehensive and can meet many ML researcher/professional needs. This talk provides an overview of the ecosystem, discussing both its strengths and potential areas for improvement.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Julia is a high-performance language for technical computing that offers advantages like type stability, just-in-time compilation, and extensive parallel computing support. Its Machine Learning ecosystem, although having fewer options, is functional and includes packages like DataFrames.jl, Flux.jl, MLJ.jl, and SciML for various ML tasks. Additional tools cover data visualization, R compatibility, and specific ML applications. The ecosystem is comprehensive and can meet many ML researcher/professional needs. This talk provides an overview of the ecosystem, discussing both its strengths and potential areas for improvement.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1672, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json index cfac95fe5..8f2813ff2 100644 --- a/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json +++ b/pydata-global-2024/videos/daphne-grasselly-enabling-multi-language-programming-in-data-engineering-workflows.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nStreamlining clinical trial output workflows is a key challenge in clinical studies. To deliver reports to health authorities, clinical trial statisticians need to create several scripts to produce deliverables such as output datasets, tables, figures, and listings. Statisticians must also handle specific execution orders to respect dependencies between the generated datasets.\n\nOur project leverages Python programming to automatically generate orchestration workflows from clinical trial project metadata using the Snakemake framework. Snakemake supports the execution of multiple jobs using Docker containers, facilitating multilingual orchestration. This enables our users to run end-to-end (E2E) data engineering workflows using their preferred programming languages, primarily SAS and R. Moreover, Snakemake allows parallel runs for efficient workflow management.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Streamlining clinical trial output workflows is a key challenge in clinical studies. To deliver reports to health authorities, clinical trial statisticians need to create several scripts to produce deliverables such as output datasets, tables, figures, and listings. Statisticians must also handle specific execution orders to respect dependencies between the generated datasets.\n\nOur project leverages Python programming to automatically generate orchestration workflows from clinical trial project metadata using the Snakemake framework. Snakemake supports the execution of multiple jobs using Docker containers, facilitating multilingual orchestration. This enables our users to run end-to-end (E2E) data engineering workflows using their preferred programming languages, primarily SAS and R. Moreover, Snakemake allows parallel runs for efficient workflow management.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1793, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json index 54d1caff2..a0c2e3106 100644 --- a/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json +++ b/pydata-global-2024/videos/duarte-carmo-panel-the-dashboard-that-grew-a-scaling-saga-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk will tell the tale of how we migrated a data application from Streamlit to Panel. And what it took to scale from 100 users to 2000+ users in less than 2 months. It's a story of pain, Kubernetes, resilience, and a whole lot of Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk will tell the tale of how we migrated a data application from Streamlit to Panel. And what it took to scale from 100 users to 2000+ users in less than 2 months. It's a story of pain, Kubernetes, resilience, and a whole lot of Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1817, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json index cfddcb992..6bb9651f3 100644 --- a/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json +++ b/pydata-global-2024/videos/el-mawass-neeman-evaluating-rags-on-the-correctness-and-coherence-of-open-source-eval-metrics.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nRetrieval-Augmented Generation (RAG), despite being a superstar of GenAI over the last year, comes with a plethora of challenges and is prone to errors. Open Source Python libraries like RAGAS and TruLens provide frameworks for evaluating RAG systems, using various metrics that leverage LLMs to assess performance. But when using LLM in a RAG system is in itself a source of errors, it remains to be seen how reliable it would be to use another LLM, allthebit a more powerful one, as a judge of the RAG performance. This study explores various RAG evaluation metrics, as well as the choice of evaluator LLM, to examine the reliability and consistency of LLM-based evaluations. The aim is to provide practical insights and guidance for interpreting these evaluations effectively, and help users make informed decisions when applying them in diverse contexts.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Retrieval-Augmented Generation (RAG), despite being a superstar of GenAI over the last year, comes with a plethora of challenges and is prone to errors. Open Source Python libraries like RAGAS and TruLens provide frameworks for evaluating RAG systems, using various metrics that leverage LLMs to assess performance. But when using LLM in a RAG system is in itself a source of errors, it remains to be seen how reliable it would be to use another LLM, allthebit a more powerful one, as a judge of the RAG performance. This study explores various RAG evaluation metrics, as well as the choice of evaluator LLM, to examine the reliability and consistency of LLM-based evaluations. The aim is to provide practical insights and guidance for interpreting these evaluations effectively, and help users make informed decisions when applying them in diverse contexts.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1807, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json index 4e803e97d..e31c763fa 100644 --- a/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json +++ b/pydata-global-2024/videos/elijah-ben-izzy-build-production-ready-ai-agents-with-burr-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn this talk we present the OS library Burr -- a tool that makes it easier to build reliable, production-ready AI applications and agents. We will show how to use Burr to address a host of production concerns problems including generating test data from prior runs, interactive debugging, persisting/loading application state, and more\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In this talk we present the OS library Burr -- a tool that makes it easier to build reliable, production-ready AI applications and agents. We will show how to use Burr to address a host of production concerns problems including generating test data from prior runs, interactive debugging, persisting/loading application state, and more\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1648, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json index 9c58c2e8f..b0c23197a 100644 --- a/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json +++ b/pydata-global-2024/videos/evan-wimpey-python-is-a-joke-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nEnjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.\n\nNote the baseline\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", + "description": "Enjoy some data-driven laughs with Evan Wimpey, a data and analytics comedian (and we're not just talking about his coding skills). No data topic is off-limits, so come enjoy some of the funniest jokes ever told at a data conference.\n\nNote the baseline", "duration": 1699, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json index 8428c9ebc..299378dfc 100644 --- a/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-gruss-let-our-optima-combine-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAn introduction to solving combinatorial optimization and constraint satisfaction problems in Python. I will review the most popular libraries for SAT/CSP. We will then deep dive to a crash corse on using Google's award winning OR-tools library, for efficiently solving some non-trivial real-world constrained combinatorial optimization problems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "An introduction to solving combinatorial optimization and constraint satisfaction problems in Python. I will review the most popular libraries for SAT/CSP. We will then deep dive to a crash corse on using Google's award winning OR-tools library, for efficiently solving some non-trivial real-world constrained combinatorial optimization problems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1741, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json index 24450cfbe..3367b40bf 100644 --- a/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json +++ b/pydata-global-2024/videos/eyal-kazin-causality-mental-hygiene-for-data-science-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTo apply or not to apply, that is the question.\n\nCausal reasoning elevates predictive outcomes by shifting from \u201cwhat happened\u201d to \u201cwhat would happen if\u201d. Yet, implementing causality can be challenging or even infeasible in some contexts. This talk explores how the very act of assessing its applicability can add value to your projects. Through a gentle introduction to causal inference tools and practical use cases, you will learn how to bring greater scientific rigour to real-world problems.\n\nTarget audience: Practicing and aspiring data scientists, machine learning engineers, and analysts looking to improve their decision-making with causal inference.\n\nNo prior knowledge is assumed.\n\nFor the seasoned practitioners I hope to shine light on aspects that may not have been considered. \ud83d\udca1\n\nCan't make the talk? Read all about it in my new TDS article: \ud83e\udde0\ud83e\uddf9 Causality \u2014 Mental Hygiene for Data Science (http://bit.ly/causal-hygiene)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "To apply or not to apply, that is the question.\n\nCausal reasoning elevates predictive outcomes by shifting from “what happened” to “what would happen if”. Yet, implementing causality can be challenging or even infeasible in some contexts. This talk explores how the very act of assessing its applicability can add value to your projects. Through a gentle introduction to causal inference tools and practical use cases, you will learn how to bring greater scientific rigour to real-world problems.\n\nTarget audience: Practicing and aspiring data scientists, machine learning engineers, and analysts looking to improve their decision-making with causal inference.\n\nNo prior knowledge is assumed.\n\nFor the seasoned practitioners I hope to shine light on aspects that may not have been considered. 💡\n\nCan't make the talk? Read all about it in my new TDS article: 🧠🧹 Causality — Mental Hygiene for Data Science (http://bit.ly/causal-hygiene)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1756, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json index a3386548f..0e3ed367f 100644 --- a/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json +++ b/pydata-global-2024/videos/francesc-alted-mastering-large-ndarray-handling-with-blosc2-and-caterva2-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAs data grows larger and more complex, efficient storage and processing become critical to achieving scalable and high-performance computing. Blosc2 (https://www.blosc.org), a powerful meta-compressor library, addresses these challenges by enabling rapid compression and decompression of large, multidimensional arrays (NDArrays). This tutorial will introduce the core concepts of working with Blosc2, focusing on how it can be leveraged to optimize both storage and computational performance in Python.\n\nAttendees will learn how to:\n\n Efficiently create and manage large NDArrays, including options for persistence.\n Select the best codecs and filters for specific data types and workflows to achieve optimal compression ratios and performance.\n Perform computations directly on compressed data to save memory and speed up processing.\n Seamlessly share NDArrays using Caterva2, a versatile library designed to enable remote sharing and serving of multidimensional datasets.\n\nThis tutorial is ideal for Python developers working with large-scale data in scientific computing, machine learning, and other data-intensive fields.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "As data grows larger and more complex, efficient storage and processing become critical to achieving scalable and high-performance computing. Blosc2 (https://www.blosc.org), a powerful meta-compressor library, addresses these challenges by enabling rapid compression and decompression of large, multidimensional arrays (NDArrays). This tutorial will introduce the core concepts of working with Blosc2, focusing on how it can be leveraged to optimize both storage and computational performance in Python.\n\nAttendees will learn how to:\n\n Efficiently create and manage large NDArrays, including options for persistence.\n Select the best codecs and filters for specific data types and workflows to achieve optimal compression ratios and performance.\n Perform computations directly on compressed data to save memory and speed up processing.\n Seamlessly share NDArrays using Caterva2, a versatile library designed to enable remote sharing and serving of multidimensional datasets.\n\nThis tutorial is ideal for Python developers working with large-scale data in scientific computing, machine learning, and other data-intensive fields.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5220, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json index bc82a9a1c..6841451dd 100644 --- a/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json +++ b/pydata-global-2024/videos/francesco-conti-deep-learning-in-energy-management-non-intrusive-load-monitoring-for-iot-devices.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nNon-Intrusive Load Monitoring (NILM) is a key technique in data-driven energy management and home automation, aimed at disaggregating energy consumption to identify active appliances in households and quantify their energy usage. This presentation:\n\n Provides an overview of NILM, highlighting its advantages and reviewing state-of-the-art deep learning algorithms developed for this purpose.\n Examines smart meters and IoT devices in energy systems, with a focus on the Chain2 protocol used in Italian energy systems. This event-based protocol generates low-volume data, enabling real-time energy monitoring and alerting.\n Presents examples of deep learning models trained on real-world IoT sensor data from energy meters, demonstrating their application in energy disaggregation.\n\nThis session offers an insightful overview of real-world deep learning applications in energy systems. While tailored for data scientists and data engineers interested in these fields, no prior knowledge is required. Join to explore how these technologies are driving energy optimization, cost reduction, and enhancing personal energy consumption awareness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Non-Intrusive Load Monitoring (NILM) is a key technique in data-driven energy management and home automation, aimed at disaggregating energy consumption to identify active appliances in households and quantify their energy usage. This presentation:\n\n Provides an overview of NILM, highlighting its advantages and reviewing state-of-the-art deep learning algorithms developed for this purpose.\n Examines smart meters and IoT devices in energy systems, with a focus on the Chain2 protocol used in Italian energy systems. This event-based protocol generates low-volume data, enabling real-time energy monitoring and alerting.\n Presents examples of deep learning models trained on real-world IoT sensor data from energy meters, demonstrating their application in energy disaggregation.\n\nThis session offers an insightful overview of real-world deep learning applications in energy systems. While tailored for data scientists and data engineers interested in these fields, no prior knowledge is required. Join to explore how these technologies are driving energy optimization, cost reduction, and enhancing personal energy consumption awareness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1796, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json index 742131834..939d3573e 100644 --- a/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json +++ b/pydata-global-2024/videos/guillaume-dalle-automatic-differentiation-a-tale-of-two-languages-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk is an introduction to automatic differentiation with a focus on the Python and Julia ecosystems. We will first explain what autodiff is and how it works, then describe its various implementations in both languages. Our goal is to give everyone a good understanding of how computer code can be differentiated, while also discussing the trade-offs this differentiability entails.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk is an introduction to automatic differentiation with a focus on the Python and Julia ecosystems. We will first explain what autodiff is and how it works, then describe its various implementations in both languages. Our goal is to give everyone a good understanding of how computer code can be differentiated, while also discussing the trade-offs this differentiability entails.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1727, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json index 38810e3e0..50bbdb128 100644 --- a/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json +++ b/pydata-global-2024/videos/hannes-muhleisen-changing-data-with-confidence-using-duckdb-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nChanging data is hard: The computer may crash, scripts could fail, and data structures could be changing. Relational data management systems provide transactional (\u201cACID\u201d) guarantees that can be immensely useful for data analysis. DuckDB provides all-or-nothing semantics for changes to datasets and is robust against failures of any kind. In this talk, we will illustrate the usefulness DuckDB\u2019s transactional facilities to bring sanity to changes to data analysis workflows in Python.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Changing data is hard: The computer may crash, scripts could fail, and data structures could be changing. Relational data management systems provide transactional (“ACID”) guarantees that can be immensely useful for data analysis. DuckDB provides all-or-nothing semantics for changes to datasets and is robust against failures of any kind. In this talk, we will illustrate the usefulness DuckDB’s transactional facilities to bring sanity to changes to data analysis workflows in Python.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1823, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json index 876707501..6c426b4ae 100644 --- a/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json +++ b/pydata-global-2024/videos/hansila-sudasinghe-pydata-bloom-framework-an-approach-to-data-science-in-university-education.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis proposal aims to develop a Python curriculum for data science for multidisciplinary studies in university education. Data Science is nowadays a trending topic in any area like social science, finance, natural science and so many others. Therefore, every student in the university education is keen to learn data science using computer languages rather than using SPSS or other traditional data analysis tools especially related to research. So, this aims to develop a new curriculum for any student studying from any discipline in higher education to learn data science using trending techniques and tools. Python is the core programming language here because it is very widely used and related to data science field. Plus, it has many advantages like easy to learn and use, platform independence used, large and active community support. Utilizing Bloom\u2019s Taxonomy as the guiding framework has developed a new curriculum for four-year degree programs to succeed in data driven world considering multidisciplinary approach. In this curriculum, students can start from Python basic programming concepts to progress to advanced analyzing techniques using libraries like Pandas, NumPy, and Seaborn, and platforms such as Anaconda and Google Colab and finally build own projects in that students related discipline. Ultimately this curriculum will leverage success in Data-centric society in domain specific applications.\n\nKeywords: Bloom\u2019s, curriculum, multidisciplinary, python, science, taxonomy\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This proposal aims to develop a Python curriculum for data science for multidisciplinary studies in university education. Data Science is nowadays a trending topic in any area like social science, finance, natural science and so many others. Therefore, every student in the university education is keen to learn data science using computer languages rather than using SPSS or other traditional data analysis tools especially related to research. So, this aims to develop a new curriculum for any student studying from any discipline in higher education to learn data science using trending techniques and tools. Python is the core programming language here because it is very widely used and related to data science field. Plus, it has many advantages like easy to learn and use, platform independence used, large and active community support. Utilizing Bloom’s Taxonomy as the guiding framework has developed a new curriculum for four-year degree programs to succeed in data driven world considering multidisciplinary approach. In this curriculum, students can start from Python basic programming concepts to progress to advanced analyzing techniques using libraries like Pandas, NumPy, and Seaborn, and platforms such as Anaconda and Google Colab and finally build own projects in that students related discipline. Ultimately this curriculum will leverage success in Data-centric society in domain specific applications.\n\nKeywords: Bloom’s, curriculum, multidisciplinary, python, science, taxonomy\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1756, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json index 194942a17..617b7da69 100644 --- a/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json +++ b/pydata-global-2024/videos/hendrik-makait-dask-xarray-geoscience-at-massive-scale-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nDoing geoscience is hard. It\u2019s even harder if you have to figure out how to handle large amounts of data!\n\nXarray is an open-source Python library designed to simplify the handling of labeled multi-dimensional arrays, like raster geospatial data, making it a favorite among geoscientists. It allows these scientists to easily express their computations, and is backed by Dask, a Python library for parallel and distributed computing, to scale computations to entire clusters of machines.\n\nPeople love using Xarray on Dask for geospatial workloads, but only up to about the terabyte scale. At this point, the stack can struggle, requiring expertise to work well and frustrating users and developers alike.\n\nTo address this and enable the Dask \u2764\ufe0f Xarray stack to scale to hundreds of terabytes, we have recently designed a suite of large-scale geospatial benchmarks. With the help of these benchmarks, we are able to understand what limits performance within Dask and Xarray, and to address these issues.\nIn this talk, we will explore how Dask integrates with libraries like Xarray and Zarr to scale geospatial workloads and other multi-dimensional array computations.\n\nWe will also dive deeper into some of the bottlenecks in the Dask \u2764\ufe0f Xarray stack that our benchmarks revealed, as well as some of the recent improvements we have made in these areas. With the help of our benchmark suite, we then assess the impact of these changes.\n\nJoin us to discover how Dask helps you scale geoscience workloads from your laptop to the cloud.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Doing geoscience is hard. It’s even harder if you have to figure out how to handle large amounts of data!\n\nXarray is an open-source Python library designed to simplify the handling of labeled multi-dimensional arrays, like raster geospatial data, making it a favorite among geoscientists. It allows these scientists to easily express their computations, and is backed by Dask, a Python library for parallel and distributed computing, to scale computations to entire clusters of machines.\n\nPeople love using Xarray on Dask for geospatial workloads, but only up to about the terabyte scale. At this point, the stack can struggle, requiring expertise to work well and frustrating users and developers alike.\n\nTo address this and enable the Dask ❤️ Xarray stack to scale to hundreds of terabytes, we have recently designed a suite of large-scale geospatial benchmarks. With the help of these benchmarks, we are able to understand what limits performance within Dask and Xarray, and to address these issues.\nIn this talk, we will explore how Dask integrates with libraries like Xarray and Zarr to scale geospatial workloads and other multi-dimensional array computations.\n\nWe will also dive deeper into some of the bottlenecks in the Dask ❤️ Xarray stack that our benchmarks revealed, as well as some of the recent improvements we have made in these areas. With the help of our benchmark suite, we then assess the impact of these changes.\n\nJoin us to discover how Dask helps you scale geoscience workloads from your laptop to the cloud.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1771, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json index 1d1601d94..2bf2585b4 100644 --- a/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json +++ b/pydata-global-2024/videos/hsia-swena-williams-python-bigquery-dataframes-hands-on-with-scalable-serverless-analysis.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nA beginner level hands-on introduction to BigQuery DataFrames. Please bring your laptop! There is nothing to install in advance\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "A beginner level hands-on introduction to BigQuery DataFrames. Please bring your laptop! There is nothing to install in advance\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5325, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json index 173b5de53..6929256a6 100644 --- a/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json +++ b/pydata-global-2024/videos/ian-ozsvald-valuable-llm-lessons-learnt-on-kaggle-s-arc-agi-challenge-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nHaving worked on Kaggle's LLM-based ARC AGI program-writing challenge for 6 months using Llama3, I'll give reflections on the lessons learned making an automatic program generator, evaluating it, coming up with strong representations for the challenge, chain-of-thought and program-of-thought styles and some multi-stage critical thinking approaches. You'll get tips for tuning your own prompts and shortcuts to help you evaluate your own LLM usage with greater assurance in the face of non-deterministic outcomes.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Having worked on Kaggle's LLM-based ARC AGI program-writing challenge for 6 months using Llama3, I'll give reflections on the lessons learned making an automatic program generator, evaluating it, coming up with strong representations for the challenge, chain-of-thought and program-of-thought styles and some multi-stage critical thinking approaches. You'll get tips for tuning your own prompts and shortcuts to help you evaluate your own LLM usage with greater assurance in the face of non-deterministic outcomes.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1742, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json index 767aa9c64..7f1c52d9f 100644 --- a/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json +++ b/pydata-global-2024/videos/irina-vidal-migallon-trustworthy-llms-vibe-checks-are-not-all-you-need-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\n9 out of 10 engineers will recommend the use of evaluation tools for their LLMs, but admit they only trust eyeballing responses to decide whether it's safe to use. The 10th carefully studies the floor in silence.\n\nThis talk is for engineers, developers or applied researchers who may or may not know of evaluation tools and metrics, but either way benefit from an overview of different risks in applications using LLMs for text generation, Open Source libraries they can use to mitigate these risks, and examples of how to use them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "9 out of 10 engineers will recommend the use of evaluation tools for their LLMs, but admit they only trust eyeballing responses to decide whether it's safe to use. The 10th carefully studies the floor in silence.\n\nThis talk is for engineers, developers or applied researchers who may or may not know of evaluation tools and metrics, but either way benefit from an overview of different risks in applications using LLMs for text generation, Open Source libraries they can use to mitigate these risks, and examples of how to use them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1660, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json index 414b4f70a..a59432c51 100644 --- a/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json +++ b/pydata-global-2024/videos/jacob-tomlinson-melody-wang-the-art-of-wrangling-your-gpu-python-environments-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nDebugging software itself is a hard task, but debugging GPU software environments can be even more challenging. Understanding the intricate interactions between hardware, drivers, CUDA, C++ dependencies, and Python libraries can be far more complex.\n\nIn this talk we will dig into how these different layers interact and how you can address some of the common pitfalls that folks run into when configuring GPU Python environments. We will also introduce a new tool, RAPIDS Doctor, that aims to take the challenge out of ensuring your software environments are in good shape. RAPIDS Doctor checks and diagnoses environmental health issues straight from the command line, ensuring that your setup is fully functional and optimized for performance.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Debugging software itself is a hard task, but debugging GPU software environments can be even more challenging. Understanding the intricate interactions between hardware, drivers, CUDA, C++ dependencies, and Python libraries can be far more complex.\n\nIn this talk we will dig into how these different layers interact and how you can address some of the common pitfalls that folks run into when configuring GPU Python environments. We will also introduce a new tool, RAPIDS Doctor, that aims to take the challenge out of ensuring your software environments are in good shape. RAPIDS Doctor checks and diagnoses environmental health issues straight from the command line, ensuring that your setup is fully functional and optimized for performance.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1645, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json index 990cedb8a..f174e796f 100644 --- a/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json +++ b/pydata-global-2024/videos/jeff-bezanson-statically-compiled-julia-for-library-development-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nDue to its high-level syntax and powerful interactive prompt, Julia is typically used as a computational front-end language. However there is growing interest in using Julia to develop statically-compiled libraries to be called from other languages (Python, C++, etc.). I will present recent and ongoing work happening in the Julia community to enable this use case, including building smaller binaries and static analysis tooling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Due to its high-level syntax and powerful interactive prompt, Julia is typically used as a computational front-end language. However there is growing interest in using Julia to develop statically-compiled libraries to be called from other languages (Python, C++, etc.). I will present recent and ongoing work happening in the Julia community to enable this use case, including building smaller binaries and static analysis tooling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1843, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json index e774a0e09..d945deeb3 100644 --- a/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json +++ b/pydata-global-2024/videos/jhaveri-joshi-holistic-evaluation-of-large-language-models-from-references-to-human-judgment.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn the rapidly evolving field of natural language processing, the evaluation of large language models (LLMs) is crucial for understanding their performance and guiding their development. This talk delves into the two primary evaluation methodologies: reference-based and reference less techniques.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In the rapidly evolving field of natural language processing, the evaluation of large language models (LLMs) is crucial for understanding their performance and guiding their development. This talk delves into the two primary evaluation methodologies: reference-based and reference less techniques.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1933, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json index ba79fec67..d4f7977eb 100644 --- a/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json +++ b/pydata-global-2024/videos/john-mount-solving-forecasting-problems-in-r-and-python-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk will explain how to solve business forecasting problems using time series methods. Time series forecasting remains a specialty topic. Because of this you really want to use a package tuned for your use case and specialized to deal with the difficulties inherent in time series forecasting. I will share a simplified problem notation that helps you select between time series packages in R and Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk will explain how to solve business forecasting problems using time series methods. Time series forecasting remains a specialty topic. Because of this you really want to use a package tuned for your use case and specialized to deal with the difficulties inherent in time series forecasting. I will share a simplified problem notation that helps you select between time series packages in R and Python\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1740, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json index a43b9f624..3e6fbf868 100644 --- a/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json +++ b/pydata-global-2024/videos/john-sandall-fairness-tales-how-to-measure-and-mitigate-unfair-bias-in-machine-learning-models.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn this 90-minute workshop, machine learning engineers and data scientists will learn practical techniques for identifying and mitigating age bias in AI-driven hiring systems. We\u2019ll explore fairness metrics like statistical parity, counterfactual fairness, and equalized odds, and demonstrate how tools such as Fairlearn, Aequitas, and IBM Fairness 360 can be used to monitor and improve model fairness. Through hands-on exercises, participants will walk away with the skills to evaluate and de-bias models in high-risk areas like recruitment.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In this 90-minute workshop, machine learning engineers and data scientists will learn practical techniques for identifying and mitigating age bias in AI-driven hiring systems. We’ll explore fairness metrics like statistical parity, counterfactual fairness, and equalized odds, and demonstrate how tools such as Fairlearn, Aequitas, and IBM Fairness 360 can be used to monitor and improve model fairness. Through hands-on exercises, participants will walk away with the skills to evaluate and de-bias models in high-risk areas like recruitment.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5415, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json index 327b00be3..ca2215445 100644 --- a/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json +++ b/pydata-global-2024/videos/jon-nordby-microcontrollers-machine-learning-with-micropython-in-1-2-3-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLearn to build powerful sensors running on low-cost microcontrollers, all in Python!\n\nDid you known that (Micro)Python can scale all the way down to microcontrollers\nthat have less than 1 MB of RAM and program memory? Such devices can cost just a few dollars, and are widely used to measure, log, analyze and react to physical phenomena. This enables a wide range of useful and fun applications - be it for a smart home, wearables, scientific measurements, consumer products or industrial solutions.\n\nIn this talk, we will demonstrate how to get started with MicroPython on a ESP32 microcontroller.\nWe will first show how to create a basic Internet-connected sensor node using simple analog/digital sensors. And then we will show how to create advanced sensors that use Digital Signal Processing and Machine Learning to analyze microphone, accelerometer or camera data\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Learn to build powerful sensors running on low-cost microcontrollers, all in Python!\n\nDid you known that (Micro)Python can scale all the way down to microcontrollers\nthat have less than 1 MB of RAM and program memory? Such devices can cost just a few dollars, and are widely used to measure, log, analyze and react to physical phenomena. This enables a wide range of useful and fun applications - be it for a smart home, wearables, scientific measurements, consumer products or industrial solutions.\n\nIn this talk, we will demonstrate how to get started with MicroPython on a ESP32 microcontroller.\nWe will first show how to create a basic Internet-connected sensor node using simple analog/digital sensors. And then we will show how to create advanced sensors that use Digital Signal Processing and Machine Learning to analyze microphone, accelerometer or camera data\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1716, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json index dcefcd510..97699eaa0 100644 --- a/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json +++ b/pydata-global-2024/videos/joseph-oladokun-bridging-the-gap-real-time-predictive-analytics-with-faustream-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nFaustream is an open-source tool I developed that bridges the gap between streaming data and real-time predictive analytics. This talk explores how Faustream leverages Python, Kafka, and Faust to handle high-velocity data streams while applying machine learning models in real-time. We'll dive into its architecture, key features, and applications, demonstrating how it can revolutionize data processing across industries.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Faustream is an open-source tool I developed that bridges the gap between streaming data and real-time predictive analytics. This talk explores how Faustream leverages Python, Kafka, and Faust to handle high-velocity data streams while applying machine learning models in real-time. We'll dive into its architecture, key features, and applications, demonstrating how it can revolutionize data processing across industries.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1063, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json index 7695805c1..db352c0c1 100644 --- a/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json +++ b/pydata-global-2024/videos/kalyan-prasad-the-hidden-costs-of-data-quality-tackling-common-data-challenges-in-ml.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nData quality is a crucial factor that significantly impacts the performance of machine learning models. However, many data scientists often overlook or underestimate the hidden costs associated with poor data quality. This talk will highlight common data challenges, and discuss their implications for model accuracy and reliability. Attendees will learn practical strategies to identify, assess, and improve data quality, ensuring their machine learning projects yield better results.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Data quality is a crucial factor that significantly impacts the performance of machine learning models. However, many data scientists often overlook or underestimate the hidden costs associated with poor data quality. This talk will highlight common data challenges, and discuss their implications for model accuracy and reliability. Attendees will learn practical strategies to identify, assess, and improve data quality, ensuring their machine learning projects yield better results.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1861, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json index 2ecb783cf..bf486f66a 100644 --- a/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json +++ b/pydata-global-2024/videos/katrina-riehl-jacob-tomlinson-gpu-development-with-python-101-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nWriting GPU code in Python is easier today than ever, and in this tutorial, we will cover how you can get started with accelerating your code.\n\nYou don't need to learn C++ and you don't need new development tools.\n\nAttendees will be expected to have a general knowledge of Python and programming concepts, but no GPU experience will be necessary. Our key takeaway for attendees will be the knowledge that they don\u2019t have to do much differently to get their code running on a GPU\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Writing GPU code in Python is easier today than ever, and in this tutorial, we will cover how you can get started with accelerating your code.\n\nYou don't need to learn C++ and you don't need new development tools.\n\nAttendees will be expected to have a general knowledge of Python and programming concepts, but no GPU experience will be necessary. Our key takeaway for attendees will be the knowledge that they don’t have to do much differently to get their code running on a GPU\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5341, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json index f9e7ad9ff..a8207c92f 100644 --- a/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json +++ b/pydata-global-2024/videos/keynote-dr-jeroen-janssens-embrace-the-unix-command-line-and-supercharge-your-pydata-workflow.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nDiscover why the Unix command line remains a powerful and relevant tool for data scientists, even in a Python-dominated landscape. This talk will demonstrate how embracing the command line and leveraging its many tools can significantly enhance your productivity, streamline data workflows, and complement your Python skills.\n\nJeroen Janssens, PhD, is a polyglot data science consultant and certified instructor. His expertise lies in visualizing data, implementing machine learning models, and building solutions using Python, R, JavaScript, and Bash. Jeroen is passionate about open source and sharing knowledge. He is the author of Data Science at the Command Line (O\u2019Reilly, 2021) and is currently writing Python Polars: The Definitive Guide (O\u2019Reilly, 2025). Every now and then he blogs at https://jeroenjanssens.com.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Discover why the Unix command line remains a powerful and relevant tool for data scientists, even in a Python-dominated landscape. This talk will demonstrate how embracing the command line and leveraging its many tools can significantly enhance your productivity, streamline data workflows, and complement your Python skills.\n\nJeroen Janssens, PhD, is a polyglot data science consultant and certified instructor. His expertise lies in visualizing data, implementing machine learning models, and building solutions using Python, R, JavaScript, and Bash. Jeroen is passionate about open source and sharing knowledge. He is the author of Data Science at the Command Line (O’Reilly, 2021) and is currently writing Python Polars: The Definitive Guide (O’Reilly, 2025). Every now and then he blogs at https://jeroenjanssens.com.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 3266, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json index 610492c74..fcf517a5e 100644 --- a/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json +++ b/pydata-global-2024/videos/keynote-peter-wang-do-python-and-data-science-matter-in-our-ai-future-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nJoin us for an exciting keynote from Peter Wang\n\nIn this talk, Peter walks through some of the most interesting learnings from the last few years of AI, as well as lessons learned over the last decade of Python's adoption for data science, in an effort to answer the question, \"What is the role of the open data science movement in the era of AI?\"\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Join us for an exciting keynote from Peter Wang\n\nIn this talk, Peter walks through some of the most interesting learnings from the last few years of AI, as well as lessons learned over the last decade of Python's adoption for data science, in an effort to answer the question, \"What is the role of the open data science movement in the era of AI?\"\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 3786, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json index b4108d48c..65901a22d 100644 --- a/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json +++ b/pydata-global-2024/videos/kiraly-risi-tveten-sktime-time-series-anomaly-detection-changepoint-detection-segmentation.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nskchange is a python compatible framework library for detecting anomalies, changepoints in time series, and segmentation.\n\nskchange is based on and extends sktime, the most widely used scikit-learn compatible framework library for learning with time series. Both packages are maintained under permissive license, easily extensible by anyone, and interoperable with the python data science stack.\n\nThis workshop gives a hands-on introduction to the new joint detection interface developed in skchange and sktime, for detecting point anomalies, changepoints, and segment anomalies, in unsupervised, semi-supervised, and supervised settings.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "skchange is a python compatible framework library for detecting anomalies, changepoints in time series, and segmentation.\n\nskchange is based on and extends sktime, the most widely used scikit-learn compatible framework library for learning with time series. Both packages are maintained under permissive license, easily extensible by anyone, and interoperable with the python data science stack.\n\nThis workshop gives a hands-on introduction to the new joint detection interface developed in skchange and sktime, for detecting point anomalies, changepoints, and segment anomalies, in unsupervised, semi-supervised, and supervised settings.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5362, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json index 99837568f..d15ebf5c6 100644 --- a/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json +++ b/pydata-global-2024/videos/koseoglu-kraev-fast-intuitive-feature-selection-via-regression-on-shapley-values.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nFeature selection is an essential process in machine learning, especially when dealing with high-dimensional datasets. It helps reduce the complexity of machine learning models, improve performance, mitigate overfitting, and decrease computation time. This talk will present a novel open source feature selection framework, shap-select.\nShap-select is noteworthy because of its simplicity - it requires only one fit of the model for which one does feature selection, and yet performs comparably to much heavier methods. It conducts a linear or logistic regression of the target on the Shapley values of the features, on the validation set, and uses the signs and significance levels of the regression coefficients to implement an efficient heuristic for feature selection in tabular regression and classification tasks.\nWe compare this to several other methods, showing that shap-select combines interpretability, computational efficiency, and performance, offering a robust solution for feature selection, especially for real-world cases where model fitting is computationally expensive.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Feature selection is an essential process in machine learning, especially when dealing with high-dimensional datasets. It helps reduce the complexity of machine learning models, improve performance, mitigate overfitting, and decrease computation time. This talk will present a novel open source feature selection framework, shap-select.\nShap-select is noteworthy because of its simplicity - it requires only one fit of the model for which one does feature selection, and yet performs comparably to much heavier methods. It conducts a linear or logistic regression of the target on the Shapley values of the features, on the validation set, and uses the signs and significance levels of the regression coefficients to implement an efficient heuristic for feature selection in tabular regression and classification tasks.\nWe compare this to several other methods, showing that shap-select combines interpretability, computational efficiency, and performance, offering a robust solution for feature selection, especially for real-world cases where model fitting is computationally expensive.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1625, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json index 905c7cfa7..32a076774 100644 --- a/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json +++ b/pydata-global-2024/videos/kristal-joi-wise-harnessing-machine-learning-to-improve-agricultural-resilience-in-africa.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAs the climate changes, farmers in Africa are facing enormous challenges, from unpredictable rainfall to shifting growing seasons. In this session, I will share how we can use machine learning (ML) models, built on open-source platforms like TensorFlow and Google Earth Engine, to predict crop yields for key staples such as maize and cassava. By looking at case studies from Kenya, Ghana, and Malawi, I'll show how ML is helping farmers decide when to plant, manage resources more efficiently, and reduce climate risks. I\u2019ll also talk about practical tools\u2014like community hubs, radio broadcasts, and SMS alerts\u2014that ensure even non-literate farmers can use these insights. Expect to walk away with actionable ideas on how to implement these techniques in your own work on food security.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "As the climate changes, farmers in Africa are facing enormous challenges, from unpredictable rainfall to shifting growing seasons. In this session, I will share how we can use machine learning (ML) models, built on open-source platforms like TensorFlow and Google Earth Engine, to predict crop yields for key staples such as maize and cassava. By looking at case studies from Kenya, Ghana, and Malawi, I'll show how ML is helping farmers decide when to plant, manage resources more efficiently, and reduce climate risks. I’ll also talk about practical tools—like community hubs, radio broadcasts, and SMS alerts—that ensure even non-literate farmers can use these insights. Expect to walk away with actionable ideas on how to implement these techniques in your own work on food security.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1675, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json index 026cf3219..21aad9308 100644 --- a/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json +++ b/pydata-global-2024/videos/leonie-hodel-using-ai-to-spot-deforestation-related-cows-on-satellite-images-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk will uncover the power of AI in combating Amazon deforestation through an innovative cattle detection system. We present a cutting-edge approach to monitoring illegal ranching, a primary driver of deforestation, using very high-resolution satellite imagery and deep learning. We'll dive into the unique challenges of detecting cattle from space \u2013 from congested scenes with small, clustered targets to diverse and cluttered backgrounds \u2013 and how we overcame them with a two-step neural network approach. By combining classification and density estimation techniques, our model efficiently identifies potential cattle locations and estimates herd sizes across varied landscapes. Discover how this interdisciplinary project, developed in collaboration with Brazilian prosecutors, leverages data science to drive real-world impact in environmental conservation and sustainable land management. Join us to explore the intersection of computer vision, geospatial analysis, and environmental advocacy, and learn how AI can be a powerful tool in the fight against deforestation in the Amazon and beyond.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk will uncover the power of AI in combating Amazon deforestation through an innovative cattle detection system. We present a cutting-edge approach to monitoring illegal ranching, a primary driver of deforestation, using very high-resolution satellite imagery and deep learning. We'll dive into the unique challenges of detecting cattle from space – from congested scenes with small, clustered targets to diverse and cluttered backgrounds – and how we overcame them with a two-step neural network approach. By combining classification and density estimation techniques, our model efficiently identifies potential cattle locations and estimates herd sizes across varied landscapes. Discover how this interdisciplinary project, developed in collaboration with Brazilian prosecutors, leverages data science to drive real-world impact in environmental conservation and sustainable land management. Join us to explore the intersection of computer vision, geospatial analysis, and environmental advocacy, and learn how AI can be a powerful tool in the fight against deforestation in the Amazon and beyond.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1790, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json index a5dcadb97..f091e7140 100644 --- a/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json +++ b/pydata-global-2024/videos/liam-brannigan-build-simple-scalable-data-pipelines-with-polars-deltalake-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nData scientists in the real world have to manage messy datasets that evolve over time. New data must be added, old data must be removed and changes to columns must be handled gracefully. Furthermore, many real world datasets grow from a size that works on a laptop to a size that must run on a server. This talk will show that in Python we can meet all these challenges in a simple and scalable way using the delta-rs package to manage the data storage and Polars to read and write the dataset.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Data scientists in the real world have to manage messy datasets that evolve over time. New data must be added, old data must be removed and changes to columns must be handled gracefully. Furthermore, many real world datasets grow from a size that works on a laptop to a size that must run on a server. This talk will show that in Python we can meet all these challenges in a simple and scalable way using the delta-rs package to manage the data storage and Polars to read and write the dataset.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1710, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json index a1bb03295..caa9b4180 100644 --- a/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json +++ b/pydata-global-2024/videos/lu-qiu-allison-wang-empowering-pyspark-with-lance-format-for-multi-modal-ai-data-pipelines.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nBy unifying PySpark's robust big data processing/analyzing capability with Lance's multimodal AI data lake, data engineers and scientists can efficiently manage and analyze the diverse data types required for cutting-edge AI applications within a familiar big data framework.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "By unifying PySpark's robust big data processing/analyzing capability with Lance's multimodal AI data lake, data engineers and scientists can efficiently manage and analyze the diverse data types required for cutting-edge AI applications within a familiar big data framework.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1617, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json index c0290f011..c4d578800 100644 --- a/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json +++ b/pydata-global-2024/videos/luca-baggi-foundational-models-for-time-series-forecasting-are-we-there-yet-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTransformers are everywhere: NLP, Computer Vision, sound generation and even protein-folding. Why not in forecasting? After all, what ChatGPT does is predicting the next word. Why this architecture isn't state-of-the-art in the time series domain?\n\nIn this talk, you will understand how Amazon Chronos and Salesforece's Moirai transformer-based forecasting models work, the datasets used to train them and how to evaluate them to see if they are a good fit for your use-case.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Transformers are everywhere: NLP, Computer Vision, sound generation and even protein-folding. Why not in forecasting? After all, what ChatGPT does is predicting the next word. Why this architecture isn't state-of-the-art in the time series domain?\n\nIn this talk, you will understand how Amazon Chronos and Salesforece's Moirai transformer-based forecasting models work, the datasets used to train them and how to evaluate them to see if they are a good fit for your use-case.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1251, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json index 85d1ebe8c..5e0dcb46a 100644 --- a/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json +++ b/pydata-global-2024/videos/maarten-breddels-python-apps-in-the-browser-made-simple-by-pycafe-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nWhat if deploying a Python app was as simple as a single click, and came at zero cost? With PyCafe, you can offer users live, interactive examples of your libraries or have them submit reproducible examples when reporting issues.\nBuilt on top of Pyodide, PyCafe runs countless web frameworks (e.g. streamlit, dash, panel, gradio) directly in the browser. By making apps easy to create, share, and edit, PyCafe opens up new workflows, including possibilities we may not have even imagined yet.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "What if deploying a Python app was as simple as a single click, and came at zero cost? With PyCafe, you can offer users live, interactive examples of your libraries or have them submit reproducible examples when reporting issues.\nBuilt on top of Pyodide, PyCafe runs countless web frameworks (e.g. streamlit, dash, panel, gradio) directly in the browser. By making apps easy to create, share, and edit, PyCafe opens up new workflows, including possibilities we may not have even imagined yet.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1653, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json index 2f4018091..2800660a4 100644 --- a/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json +++ b/pydata-global-2024/videos/maggie-wolff-measuring-the-user-experience-and-the-impact-of-effort-on-business-outcomes.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nHow do you know when a user experience isn\u2019t hitting the mark? Do you wait for it to show up in qualitative feedback? Do you have a long list of different metrics that you have to keep track of that could potentially signal a problem? When evaluating user experiences, how can you quantify if it\u2019s a good experience or not? Additionally, how do you know if your good or bad experience is impacting other areas of the business?\n\nThese are common problems for product managers and the data scientists and analysts who support them. To solve them, I propose creating an aggregate metric that represents the effort or friction experienced by your users - a User Effort Index.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "How do you know when a user experience isn’t hitting the mark? Do you wait for it to show up in qualitative feedback? Do you have a long list of different metrics that you have to keep track of that could potentially signal a problem? When evaluating user experiences, how can you quantify if it’s a good experience or not? Additionally, how do you know if your good or bad experience is impacting other areas of the business?\n\nThese are common problems for product managers and the data scientists and analysts who support them. To solve them, I propose creating an aggregate metric that represents the effort or friction experienced by your users - a User Effort Index.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1748, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json index 8d1431737..17caf4bc2 100644 --- a/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json +++ b/pydata-global-2024/videos/maniyam-nielsen-preparing-data-for-llm-applications-using-data-prep-kit-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nData Prep Kit (https://github.com/IBM/data-prep-kit) is a new open source python library to help you wrangle and clean your data for generative AI applications (de-dupe, detect language, removing PII, detect malware, creating embeddings, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Data Prep Kit (https://github.com/IBM/data-prep-kit) is a new open source python library to help you wrangle and clean your data for generative AI applications (de-dupe, detect language, removing PII, detect malware, creating embeddings, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 4647, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json index e02e73b70..05bb39967 100644 --- a/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json +++ b/pydata-global-2024/videos/mark-moyou-phd-understanding-the-end-to-end-llm-training-and-inference-pipeline.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nHave you ever wanted to understand LLM internals such as pre-training, supervised fine-tuning, instruction-tuning, reinforcement learning with human feedback, parameter efficient fine-tuning, expanding LLM context lengths, attention mechanism variants, model deployment performance, and cost optimization, which GPUs to use when and more? This talk will take an end-to-end review of the LLM training and deployment pipeline to give you both a stronger intuition and a faster path to implementation using model training and deployment frameworks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Have you ever wanted to understand LLM internals such as pre-training, supervised fine-tuning, instruction-tuning, reinforcement learning with human feedback, parameter efficient fine-tuning, expanding LLM context lengths, attention mechanism variants, model deployment performance, and cost optimization, which GPUs to use when and more? This talk will take an end-to-end review of the LLM training and deployment pipeline to give you both a stronger intuition and a faster path to implementation using model training and deployment frameworks.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1773, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json index 97e710905..313258e47 100644 --- a/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json +++ b/pydata-global-2024/videos/martin-durant-akimbo-vectorized-processing-of-nested-ragged-dataframe-columns-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nWe present \u201cakimbo\u201d, a library bringing a numpy-like API and vector-speed processing to dataframes on the CPU or GPU. When your data is more complex than simple one-dimensional columns, this is the most natural way to perform selection, mapping and aggregations without iterating over python objects, saving a large factor in memory and processing time.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "We present “akimbo”, a library bringing a numpy-like API and vector-speed processing to dataframes on the CPU or GPU. When your data is more complex than simple one-dimensional columns, this is the most natural way to perform selection, mapping and aggregations without iterating over python objects, saving a large factor in memory and processing time.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1787, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json index 341a206d7..d6ab1e8b4 100644 --- a/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json +++ b/pydata-global-2024/videos/marysia-winkels-the-data-that-shapes-foundational-llms-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\n\"What training data do you need, don't you just train on the whole internet?\"\n\"Doesn't data production rely heavily on outsourcing to cheap labour markets in the Global South?\"\n\"Isn't all training data just synthetic nonsense generated by LLMs nowadays, how can you expect a model to learn anything worthwhile?\"\n\nThese are all questions that I regularly get, when I tell people I work on building foundational LLMs. Because as often as we use LLMs in our daily lives nowadays, people generally know very little of the data that went into the LLM to train it.\n\nIn this talk, I'll address these questions and hope to build an understanding of what it takes to build an LLM from scratch, from a data perspective.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "\"What training data do you need, don't you just train on the whole internet?\"\n\"Doesn't data production rely heavily on outsourcing to cheap labour markets in the Global South?\"\n\"Isn't all training data just synthetic nonsense generated by LLMs nowadays, how can you expect a model to learn anything worthwhile?\"\n\nThese are all questions that I regularly get, when I tell people I work on building foundational LLMs. Because as often as we use LLMs in our daily lives nowadays, people generally know very little of the data that went into the LLM to train it.\n\nIn this talk, I'll address these questions and hope to build an understanding of what it takes to build an LLM from scratch, from a data perspective.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1670, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json index d6f8a3ebd..b3824219c 100644 --- a/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json +++ b/pydata-global-2024/videos/matthew-powers-new-features-in-apache-spark-4-0-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThe upcoming release of Apache Spark 4.0 delivers substantial enhancements that refine the functionality and augment the developer experience with the Spark unified analytics engine.\n\nAttendees will learn how to use Apache Spark 4.0's advancements for optimized data processing and analytics\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "The upcoming release of Apache Spark 4.0 delivers substantial enhancements that refine the functionality and augment the developer experience with the Spark unified analytics engine.\n\nAttendees will learn how to use Apache Spark 4.0's advancements for optimized data processing and analytics\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1735, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json index f9430ae9e..0761999f1 100644 --- a/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json +++ b/pydata-global-2024/videos/michael-sarahan-going-plaid-striving-for-speed-of-light-in-ci-pipelines-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\n\u201cI like waiting for my build jobs,\u201d said no one ever. CI is an essential part of ensuring quality, helping to highlight new issues before they might be merged into the main codebase. CI gives us confidence that the code changes being proposed don\u2019t break things, as least as far as our tests cover. That confidence comes at the cost of time and compute resources.\n\nThe RAPIDS team at NVIDIA manages its own operations and compute resources. Those resources are limited, of course, so we wait our turn and put the toys back when we\u2019re done.. It is essential to us that we are using our resources as efficiently as possible. This is the \u201cSpeed of Light\u201d principle at NVIDIA: how close are you to a theoretical optimal limit? For CI, this involves several factors: startup wait time, docker image setup time, cache utilization, build tool processes, and limiting unnecessary redoing builds and tests for things that haven\u2019t changed. The RAPIDS team set out to add telemetry to all of our builds, so that we can quantify where we are spending our time and compute resources, and ensure that we are spending them wisely. We\u2019ll demonstrate the telemetry tools that we\u2019re using, and show how you can add them to your build jobs.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "“I like waiting for my build jobs,” said no one ever. CI is an essential part of ensuring quality, helping to highlight new issues before they might be merged into the main codebase. CI gives us confidence that the code changes being proposed don’t break things, as least as far as our tests cover. That confidence comes at the cost of time and compute resources.\n\nThe RAPIDS team at NVIDIA manages its own operations and compute resources. Those resources are limited, of course, so we wait our turn and put the toys back when we’re done.. It is essential to us that we are using our resources as efficiently as possible. This is the “Speed of Light” principle at NVIDIA: how close are you to a theoretical optimal limit? For CI, this involves several factors: startup wait time, docker image setup time, cache utilization, build tool processes, and limiting unnecessary redoing builds and tests for things that haven’t changed. The RAPIDS team set out to add telemetry to all of our builds, so that we can quantify where we are spending our time and compute resources, and ensure that we are spending them wisely. We’ll demonstrate the telemetry tools that we’re using, and show how you can add them to your build jobs.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1784, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json index 974a1b71e..d9549d2f8 100644 --- a/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json +++ b/pydata-global-2024/videos/nathan-colbert-from-inference-to-features-build-a-core-ml-platform-from-scratch.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis hands-on tutorial guides participants through the process of constructing the essential components of a Machine Learning Platform (MLP) from scratch. We'll focus on implementing five core elements: a feature store, model registry, orchestrator, inference engine, and basic monitoring system. The session emphasizes practical, hands-on coding using Test-Driven Development (TDD), Domain Driven Design, and hexagonal architecture principles providing attendees with a functional foundation for a robust ML infrastructure.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This hands-on tutorial guides participants through the process of constructing the essential components of a Machine Learning Platform (MLP) from scratch. We'll focus on implementing five core elements: a feature store, model registry, orchestrator, inference engine, and basic monitoring system. The session emphasizes practical, hands-on coding using Test-Driven Development (TDD), Domain Driven Design, and hexagonal architecture principles providing attendees with a functional foundation for a robust ML infrastructure.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5359, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json index b6aec2154..26f57ae82 100644 --- a/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicola-rennie-practical-techniques-for-polished-visuals-with-plotnine-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nPlotnine is a Python library that implements the Grammar of Graphics, enabling users to create complex, layered plots. This talk covers techniques for customising your plots, using time series data as an example, and highlights how plotnine integrates with matplotlib, allowing you to enhance your data visualisations for better storytelling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Plotnine is a Python library that implements the Grammar of Graphics, enabling users to create complex, layered plots. This talk covers techniques for customising your plots, using time series data as an example, and highlights how plotnine integrates with matplotlib, allowing you to enhance your data visualisations for better storytelling.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2062, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json index a9e009bc0..1197b356c 100644 --- a/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json +++ b/pydata-global-2024/videos/nicolo-giso-image-recognition-for-safety-on-the-factory-floor-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTenova, as an innovative engineering company, collaborates closely with its client-partners to create advanced technologies and services that optimize business operations.\n\nThis talk discusses the deployment of our image recognition system to identify and mitigate potential hazards on steel plants, specifically focusing on the detection of bulky steel pieces.\nThe system was deployed on-premise using an edge device and an IP camera, supported by Azure IoT Edge and a Flask API for image processing and prediction.\nA recent migration to a RabbitMQ-based architecture using Pika enhanced scalability and communication.\n\nThe presentation will cover technical strategies, the challenges (like offline functionality and real-time, low-latency hazard detection) and the positive impact of the system on workplace safety and operational efficiency.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Tenova, as an innovative engineering company, collaborates closely with its client-partners to create advanced technologies and services that optimize business operations.\n\nThis talk discusses the deployment of our image recognition system to identify and mitigate potential hazards on steel plants, specifically focusing on the detection of bulky steel pieces.\nThe system was deployed on-premise using an edge device and an IP camera, supported by Azure IoT Edge and a Flask API for image processing and prediction.\nA recent migration to a RabbitMQ-based architecture using Pika enhanced scalability and communication.\n\nThe presentation will cover technical strategies, the challenges (like offline functionality and real-time, low-latency hazard detection) and the positive impact of the system on workplace safety and operational efficiency.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1441, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json index 999941ed1..59f8a5fc5 100644 --- a/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json +++ b/pydata-global-2024/videos/nompumelelo-mtsweni-3d-geospatial-data-visualization-using-python-and-cesiumjs-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nGeospatial data is more important than ever for tackling real-world challenges like urban planning and climate change. This tutorial teaches you how to use tools like CesiumJS and Python to turn raw data into interactive 3D visuals. It\u2019s a hands-on way to bring data to life and try to make an impact.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Geospatial data is more important than ever for tackling real-world challenges like urban planning and climate change. This tutorial teaches you how to use tools like CesiumJS and Python to turn raw data into interactive 3D visuals. It’s a hands-on way to bring data to life and try to make an impact.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5389, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json index c2081f5ca..196830405 100644 --- a/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json +++ b/pydata-global-2024/videos/noor-aftab-the-missing-78-how-women-in-ai-data-can-complete-the-future-of-innovation.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk focuses on the underrepresentation of women in AI and data science, where only 22% of AI professionals are women. We will explore how addressing the missing 78% is critical to creating inclusive, innovative solutions that benefit society as a whole. Attendees will learn about the current challenges women face, the importance of diverse perspectives in AI development, and actionable strategies for empowering women in the field through community engagement, mentorship, and data-driven policies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk focuses on the underrepresentation of women in AI and data science, where only 22% of AI professionals are women. We will explore how addressing the missing 78% is critical to creating inclusive, innovative solutions that benefit society as a whole. Attendees will learn about the current challenges women face, the importance of diverse perspectives in AI development, and actionable strategies for empowering women in the field through community engagement, mentorship, and data-driven policies.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1789, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json index f4cefd567..d27b647b4 100644 --- a/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json +++ b/pydata-global-2024/videos/paco-nathan-catching-bad-guys-using-open-data-and-open-models-for-graphs-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nGraphRAG is a popular way to use KGs to ground AI apps. Most GraphRAG tutorials use LLMs to build graph automatically from unstructured data. However, what if you're working on use cases such as investigative journalism and sanctions compliance -- \"catching bad guys\" -- where transparency for decisions and evidence are required?\n\nThis talk explores how to leverage open data, open models, and open source to build investigative graphs which are accountable, exploring otherwise hidden relations in the data that indicate fraud or corruption. This illustrates techniques used in production use cases for anti-money laundering (AML), ultimate beneficial owner (UBO), rapid movement of funds (RMF), and other areas of sanctions compliance in general.\n\nThis approach uses Python open source libraries, e.g., the K\u00f9zuDB graph database and LanceDB vector database. For each NLP task we use state-of-the-art open models (mostly not LLMs) emphasizing how to tune for a domain context: named entity recognition, relation extraction, textgraph, entity linking, as well as entity resolution to merge structured data and produce a semantic overlay that organizes the graph.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "GraphRAG is a popular way to use KGs to ground AI apps. Most GraphRAG tutorials use LLMs to build graph automatically from unstructured data. However, what if you're working on use cases such as investigative journalism and sanctions compliance -- \"catching bad guys\" -- where transparency for decisions and evidence are required?\n\nThis talk explores how to leverage open data, open models, and open source to build investigative graphs which are accountable, exploring otherwise hidden relations in the data that indicate fraud or corruption. This illustrates techniques used in production use cases for anti-money laundering (AML), ultimate beneficial owner (UBO), rapid movement of funds (RMF), and other areas of sanctions compliance in general.\n\nThis approach uses Python open source libraries, e.g., the KùzuDB graph database and LanceDB vector database. For each NLP task we use state-of-the-art open models (mostly not LLMs) emphasizing how to tune for a domain context: named entity recognition, relation extraction, textgraph, entity linking, as well as entity resolution to merge structured data and produce a semantic overlay that organizes the graph.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1750, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json index 284faea67..bfc6efd17 100644 --- a/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json +++ b/pydata-global-2024/videos/pascal-tomecek-leveraging-csp-for-live-inference-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nCSP is a newly open-sourced library for stream processing in Python. In this talk, we discuss how CSP can be leveraged to handle all stages of an online machine learning pipeline from feature generation to live training and inference\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "CSP is a newly open-sourced library for stream processing in Python. In this talk, we discuss how CSP can be leveraged to handle all stages of an online machine learning pipeline from feature generation to live training and inference\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1721, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json index 92fcb8468..6a2e27f88 100644 --- a/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json +++ b/pydata-global-2024/videos/patrick-deziel-putting-the-data-science-back-into-llm-evaluation-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nMany organizations are eager to build and deploy their own large language models (LLMs), but validating them can feel frustrating and incomplete. Fortunately, as data scientists we are experts in model diagnostics, and we can extend these same principles to LLM validation. In this talk, I will present a scientific approach to evaluating custom text generation models in Python across several dimensions such as safety, coherence, and correctness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Many organizations are eager to build and deploy their own large language models (LLMs), but validating them can feel frustrating and incomplete. Fortunately, as data scientists we are experts in model diagnostics, and we can extend these same principles to LLM validation. In this talk, I will present a scientific approach to evaluating custom text generation models in Python across several dimensions such as safety, coherence, and correctness.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1763, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json index a4c6b4931..8f0059130 100644 --- a/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json +++ b/pydata-global-2024/videos/prashanth-rao-graph-rag-bringing-together-graph-and-vector-search-to-empower-retrieval.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk will go over an application scenario that brings together the benefits of vector search with graph traversal. Knowledge graphs (or more generally, graphs), have long been used to model structured data that capture the connection between entities in the real world. Recently, there has been a lot of interest in the topic of Graph RAG, which aims to use graphs as part of the retrieval process in RAG, to enhance the outcomes. The talk will cover a practical example to showcase how Python developers can leverage the PyData ecosystem alongside two open source, embedded databases: K\u00f9zu for the graph component, and LanceDB for the vector component of the retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk will go over an application scenario that brings together the benefits of vector search with graph traversal. Knowledge graphs (or more generally, graphs), have long been used to model structured data that capture the connection between entities in the real world. Recently, there has been a lot of interest in the topic of Graph RAG, which aims to use graphs as part of the retrieval process in RAG, to enhance the outcomes. The talk will cover a practical example to showcase how Python developers can leverage the PyData ecosystem alongside two open source, embedded databases: Kùzu for the graph component, and LanceDB for the vector component of the retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1787, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json index 84809afae..ff3661e94 100644 --- a/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json +++ b/pydata-global-2024/videos/quan-nguyen-cost-effective-data-annotation-with-bayesian-experimental-design-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nUnlike stylized machine learning examples in textbooks and lectures, data are often not readily available to be used to train models and gain insight in real-world applications; instead, practitioners are required to collect those data themselves.\nHowever, data annotation can be expensive (in terms of time, money, or some safety-critical conditions), thus limiting the amount of data we can possibly obtain.\n(Examples include eliciting an online shopper's preference with ads at the risk of being intrusive, or conducting an expensive survey to understand the market of a given product.)\nFurther, not all data are created equal: some are more informative than others.\nFor example, a data point that is similar to one already in our training set is unlikely to give us new information; conversely, a point that is different from the data we have thus far could yield novel insight.\nThese considerations motivate a way for us to identify the most informative data points to label and gain knowledge in a way that makes use of our labeling budget as effectively as possible.\nBayesian experimental design (BED) formalizes this framework, leveraging the tools from Bayesian statistics and machine learning to answer the question: which data point is the most valuable that should be labeled to improve our knowledge?\n\nThis talk serves as a friendly introduction to BED including its motivation as discussed above, how it works, and how to implement it in Python.\nDuring our discussions, we will show that interestingly, binary search, a popular algorithm in computer science, is a special case of BED.\nData scientists and ML practitioners who are interested in decision-making under uncertainty and probabilistic ML will benefit from this talk.\nWhile most background knowledge necessary to follow the talk will be covered, the audience should be familiar with common concepts in ML such as training data, predictive models, and common probability distributions (normal, uniform, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Unlike stylized machine learning examples in textbooks and lectures, data are often not readily available to be used to train models and gain insight in real-world applications; instead, practitioners are required to collect those data themselves.\nHowever, data annotation can be expensive (in terms of time, money, or some safety-critical conditions), thus limiting the amount of data we can possibly obtain.\n(Examples include eliciting an online shopper's preference with ads at the risk of being intrusive, or conducting an expensive survey to understand the market of a given product.)\nFurther, not all data are created equal: some are more informative than others.\nFor example, a data point that is similar to one already in our training set is unlikely to give us new information; conversely, a point that is different from the data we have thus far could yield novel insight.\nThese considerations motivate a way for us to identify the most informative data points to label and gain knowledge in a way that makes use of our labeling budget as effectively as possible.\nBayesian experimental design (BED) formalizes this framework, leveraging the tools from Bayesian statistics and machine learning to answer the question: which data point is the most valuable that should be labeled to improve our knowledge?\n\nThis talk serves as a friendly introduction to BED including its motivation as discussed above, how it works, and how to implement it in Python.\nDuring our discussions, we will show that interestingly, binary search, a popular algorithm in computer science, is a special case of BED.\nData scientists and ML practitioners who are interested in decision-making under uncertainty and probabilistic ML will benefit from this talk.\nWhile most background knowledge necessary to follow the talk will be covered, the audience should be familiar with common concepts in ML such as training data, predictive models, and common probability distributions (normal, uniform, etc.)\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1878, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json index 23d39c1bf..b632014d3 100644 --- a/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json +++ b/pydata-global-2024/videos/robin-linacre-rapid-deduplication-and-fuzzy-matching-of-large-datasets-using-splink.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nData deduplication is a ubiquitous data quality problem that most data people will encounter at some point in their career. It happens whenever multiple records are collected about the same person or other entity without a unique identifier that ties these records together.\n\nThis talk provides beginners with everything they need to start linking and deduping large datasets using Splink (https://github.com/moj-analytical-services/splink), a free Python library.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Data deduplication is a ubiquitous data quality problem that most data people will encounter at some point in their career. It happens whenever multiple records are collected about the same person or other entity without a unique identifier that ties these records together.\n\nThis talk provides beginners with everything they need to start linking and deduping large datasets using Splink (https://github.com/moj-analytical-services/splink), a free Python library.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1644, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json index 17f1d2f7f..63928ed4f 100644 --- a/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json +++ b/pydata-global-2024/videos/rodrigo-girao-serrao-understanding-polars-data-types-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nPolars (https://github.com/pola-rs/polars) boasts 18 different data types, not including variants of numerical types.\n\nDo we really need such a vast collection of data types?\n\nWhat is the use case for each type?\n\nWhat is the difference between List and Array? Or between Categorical and Enum? And why on Earth would I ever need a Struct?\n\nThis talk will clear up all of these questions and more, as we go through the data types that Polars provides and understand why we need each one of them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Polars (https://github.com/pola-rs/polars) boasts 18 different data types, not including variants of numerical types.\n\nDo we really need such a vast collection of data types?\n\nWhat is the use case for each type?\n\nWhat is the difference between List and Array? Or between Categorical and Enum? And why on Earth would I ever need a Struct?\n\nThis talk will clear up all of these questions and more, as we go through the data types that Polars provides and understand why we need each one of them.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1829, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json index 2ba75d946..85fcc4c5f 100644 --- a/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json +++ b/pydata-global-2024/videos/ryan-varley-let-s-get-you-started-with-asynchronous-programming-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAsynchronous programming can be intimidating for many due to its unique syntax, paradigm, and different behavior in environments like IPython and Jupyter notebooks.\n\nBut it\u2019s not that complicated\u2014and I'll prove it. In this talk, I will demystify the basics, along with some advanced concepts, from a practical perspective. By the end, you'll be ready to get started and implement significant performance improvements in your network or I/O-bound code.\n\nAttend this talk if you\u2019ve been intimidated by async and await for a while and are ready to change that.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Asynchronous programming can be intimidating for many due to its unique syntax, paradigm, and different behavior in environments like IPython and Jupyter notebooks.\n\nBut it’s not that complicated—and I'll prove it. In this talk, I will demystify the basics, along with some advanced concepts, from a practical perspective. By the end, you'll be ready to get started and implement significant performance improvements in your network or I/O-bound code.\n\nAttend this talk if you’ve been intimidated by async and await for a while and are ready to change that.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1808, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json index 2a6258fcd..e9d56fd82 100644 --- a/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json +++ b/pydata-global-2024/videos/sara-zanzottera-building-llm-voice-bots-with-open-source-tools-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLarge Language Models are great at writing and chatting, but are they also able to talk like a human? Today, modern LLM-based voice bots can listen to users, talk back to them with a realistic voice, handle interruptions and improvise, while sticking to the goal they're given by their builders. And this is not only true for the latest, eye-watering expensive OpenAI's models! In this session we will learn how modern voice bots are made, which open source tools are available to build them, and we are going to see in practice how to build one. At the end of the session, the demo's full source code will be shared with the audience.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Large Language Models are great at writing and chatting, but are they also able to talk like a human? Today, modern LLM-based voice bots can listen to users, talk back to them with a realistic voice, handle interruptions and improvise, while sticking to the goal they're given by their builders. And this is not only true for the latest, eye-watering expensive OpenAI's models! In this session we will learn how modern voice bots are made, which open source tools are available to build them, and we are going to see in practice how to build one. At the end of the session, the demo's full source code will be shared with the audience.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1925, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json index 7b72b231c..f7a15d0f1 100644 --- a/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json +++ b/pydata-global-2024/videos/saranjeet-kaur-bhogal-empowering-new-contributors-the-evolving-role-of-the-r-development-guide.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThe R Development Guide (R Dev Guide) (https://contributor.r-project.org/rdevguide/) serves as a resource for onboarding new contributors to the R project. Initially drafted in 2021 and then expanded during the Google Season of Docs 2022, the guide has evolved to make contributing more accessible, especially for newcomers. This talk will explore the latest developments in the guide, its impact on the R community, and how it fosters inclusivity within the project by simplifying the contribution process.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "The R Development Guide (R Dev Guide) (https://contributor.r-project.org/rdevguide/) serves as a resource for onboarding new contributors to the R project. Initially drafted in 2021 and then expanded during the Google Season of Docs 2022, the guide has evolved to make contributing more accessible, especially for newcomers. This talk will explore the latest developments in the guide, its impact on the R community, and how it fosters inclusivity within the project by simplifying the contribution process.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1192, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json index 1ff436e36..f712d3eeb 100644 --- a/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json +++ b/pydata-global-2024/videos/saurabh-garg-navigating-cloud-expenses-in-data-ai-strategies-for-scientists-and-engineers.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nData rules the world and data-scientists / MLEs across academia and industry are creating new and innovative ways to glean insights which have changed our lives through easy to understand and intuitive interfaces. At the heart of the AI / ML revolution ( genAI, LLMs, bioinformatics, climate science etc ) is the availability and elasticity of state of the art hardware which enables processing large swaths of data ( TBs ) that could not run on local laptops for want of compute/memory. Cloud providers have commoditized these powerful machines to the extent that they are now available to every person with a few clicks.\n\nCloud computing allows us to tradeoff upfront hardware costs for granular operational expenses such as renting GPUs by the second. Prima facie this might seem like a winning formula, a key downside is that these costs often add up uncontrollably. Attributing the usage of such hardware to Data/AI/ML jobs across dimensions like cloud accounts, instances, workloads down to the lowest level of granularity, can help provide transparency to not only cost albeit resource management as well.\n\nThrough our work with open-source Metaflow, which started at Netflix in 2017, we have had an opportunity to help customers place their cloud spend in the context of value produced by individual projects combined with more granular resource management to limit spend.\n\nIn this talk, we will provide an overview of the lessons we have learnt in our quest to get a better handle on costs by using Metaflow. We will share best practices to consider when writing AI/ML workloads and how constructs in the Metaflow framework can be used to answer questions Data-Scientists/MLE\u2019s ask themselves such as:\n\nHow do my cloud costs break down over time and what workloads/cloud instances are driving these costs?\nAre the workloads executing tuned to allow maximum usage of these expensive resources?\nHow can I refactor my workloads such that the expensive resources are used to their optimal capacity?\nIn particular, we'll focus on best practices to follow when working with large datasets in a distributed multi cloud / cluster environments, and how Metaflow constructs can help achieve that in a human friendly manner, with very few lines of code.\n\nThe audience will be empowered to build and deploy production-grade Data/AI/ML pipelines while learning strategies on how to optimize workloads to keep expensive ML/AI operations under control. Finally, the audience will have the tools to answer questions like \u201cAm I using my resources to their fullest extent? If not, what are the opportunities for tuning my AI/ML jobs resource requirements, to bin pack hardware and subsequently reduces overall costs\u201d\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Data rules the world and data-scientists / MLEs across academia and industry are creating new and innovative ways to glean insights which have changed our lives through easy to understand and intuitive interfaces. At the heart of the AI / ML revolution ( genAI, LLMs, bioinformatics, climate science etc ) is the availability and elasticity of state of the art hardware which enables processing large swaths of data ( TBs ) that could not run on local laptops for want of compute/memory. Cloud providers have commoditized these powerful machines to the extent that they are now available to every person with a few clicks.\n\nCloud computing allows us to tradeoff upfront hardware costs for granular operational expenses such as renting GPUs by the second. Prima facie this might seem like a winning formula, a key downside is that these costs often add up uncontrollably. Attributing the usage of such hardware to Data/AI/ML jobs across dimensions like cloud accounts, instances, workloads down to the lowest level of granularity, can help provide transparency to not only cost albeit resource management as well.\n\nThrough our work with open-source Metaflow, which started at Netflix in 2017, we have had an opportunity to help customers place their cloud spend in the context of value produced by individual projects combined with more granular resource management to limit spend.\n\nIn this talk, we will provide an overview of the lessons we have learnt in our quest to get a better handle on costs by using Metaflow. We will share best practices to consider when writing AI/ML workloads and how constructs in the Metaflow framework can be used to answer questions Data-Scientists/MLE’s ask themselves such as:\n\nHow do my cloud costs break down over time and what workloads/cloud instances are driving these costs?\nAre the workloads executing tuned to allow maximum usage of these expensive resources?\nHow can I refactor my workloads such that the expensive resources are used to their optimal capacity?\nIn particular, we'll focus on best practices to follow when working with large datasets in a distributed multi cloud / cluster environments, and how Metaflow constructs can help achieve that in a human friendly manner, with very few lines of code.\n\nThe audience will be empowered to build and deploy production-grade Data/AI/ML pipelines while learning strategies on how to optimize workloads to keep expensive ML/AI operations under control. Finally, the audience will have the tools to answer questions like “Am I using my resources to their fullest extent? If not, what are the opportunities for tuning my AI/ML jobs resource requirements, to bin pack hardware and subsequently reduces overall costs”\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1813, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json index 345bce3c1..18cc748a0 100644 --- a/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json +++ b/pydata-global-2024/videos/sayantika-banik-the-lego-approach-to-designing-pydata-workflows-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nWhat if designing data workflows felt like snapping together LEGO blocks? In this talk, we\u2019ll explore how open-source tools enable flexible, modular PyData workflows. We\u2019ll discuss why open source is essential for avoiding vendor lock-in and how to integrate libraries and frameworks within the Python ecosystem, alongside tools like GitHub Actions. Plus, I\u2019ll introduce DataJourney, an open-source toolkit I developed that makes designing workflows as fun and creative as building with LEGO.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "What if designing data workflows felt like snapping together LEGO blocks? In this talk, we’ll explore how open-source tools enable flexible, modular PyData workflows. We’ll discuss why open source is essential for avoiding vendor lock-in and how to integrate libraries and frameworks within the Python ecosystem, alongside tools like GitHub Actions. Plus, I’ll introduce DataJourney, an open-source toolkit I developed that makes designing workflows as fun and creative as building with LEGO.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1829, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json index 3f0e639d8..5209bc8b1 100644 --- a/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json +++ b/pydata-global-2024/videos/sergey-maydanov-bringing-nvidia-math-libraries-to-python-scientific-community-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThe nvmath-python is a new way of delivering NVIDIA accelerated Math Libraries to Python users: researchers-practitioners, library and framework developers, and optimized GPU kernel developers. In this talk we will provide an introduction to the library design goals, its architecture, overview of the key features along with its usage examples.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "The nvmath-python is a new way of delivering NVIDIA accelerated Math Libraries to Python users: researchers-practitioners, library and framework developers, and optimized GPU kernel developers. In this talk we will provide an introduction to the library design goals, its architecture, overview of the key features along with its usage examples.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1646, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json index 71db684be..64d768bef 100644 --- a/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json +++ b/pydata-global-2024/videos/shivay-lamba-streamlining-ai-development-and-deployment-with-kitops-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAs organizations increasingly integrate and adopt AI and machine learning internally, the challenge of maintaining separate pipelines for ML-powered systems and conventional software makes it difficult for DevOps teams to maintain these separate pipelines. This talk explores a unified approach to DevOps and MLOps, demonstrating how existing DevOps pipelines can be transformed into efficient MLOps pipelines using ModelKits with KitOps\n\nWe'll begin by examining the reasons behind the traditional separation of DevOps and MLOps pipelines, including differences in project nature, required expertise, and the size and complexity of artifacts. We'll then delve into the challenges posed by separate pipelines, such as increased costs, coordination difficulties, and accumulating technical debt. Thus the attendees will learn how to leverage open source tooling like KitOps to create a unified pipeline that accommodates both traditional software and ML-powered projects, ultimately leading to more efficient and cost-effective operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "As organizations increasingly integrate and adopt AI and machine learning internally, the challenge of maintaining separate pipelines for ML-powered systems and conventional software makes it difficult for DevOps teams to maintain these separate pipelines. This talk explores a unified approach to DevOps and MLOps, demonstrating how existing DevOps pipelines can be transformed into efficient MLOps pipelines using ModelKits with KitOps\n\nWe'll begin by examining the reasons behind the traditional separation of DevOps and MLOps pipelines, including differences in project nature, required expertise, and the size and complexity of artifacts. We'll then delve into the challenges posed by separate pipelines, such as increased costs, coordination difficulties, and accumulating technical debt. Thus the attendees will learn how to leverage open source tooling like KitOps to create a unified pipeline that accommodates both traditional software and ML-powered projects, ultimately leading to more efficient and cost-effective operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1822, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json index 9672b7013..29529e268 100644 --- a/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json +++ b/pydata-global-2024/videos/shreya-khurana-realtime-time-series-anomaly-detection-in-production-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAnomaly detection is hardly a new problem, nor is the progress in it as rapid as the LLM blast we\u2019re witnessing today. But it is pressing.\n\nIn this talk, we\u2019ll talk about a realtime anomaly detection pipeline on time series data and discuss the nitty-gritties of the algorithm knobs that help us build an unbiased and reliable system, which includes 1) using NeuralProphet, an open source framework, to forecast for time series data and 2) using robust techniques to detect true anomalies using forecasting errors.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Anomaly detection is hardly a new problem, nor is the progress in it as rapid as the LLM blast we’re witnessing today. But it is pressing.\n\nIn this talk, we’ll talk about a realtime anomaly detection pipeline on time series data and discuss the nitty-gritties of the algorithm knobs that help us build an unbiased and reliable system, which includes 1) using NeuralProphet, an open source framework, to forecast for time series data and 2) using robust techniques to detect true anomalies using forecasting errors.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1807, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json index 7abd8a791..203c998a3 100644 --- a/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json +++ b/pydata-global-2024/videos/shrikanth-singh-automating-sea-retargeting-for-smarter-audience-engagement-and-higher-conversions.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThe paid search landscape is undergoing a remarkable transformation, evolving from traditional keyword-centric strategies to a more nuanced approach that prioritizes audience targeting. This shift is not just a trend; it\u2019s a response to the ever-increasing demand for precision and effectiveness in reaching potential customers in a crowded digital marketplace.\n\nAt the forefront of this evolution is our innovative automated system designed to identify high-intent users through sophisticated batch processing of their website behaviour. By harnessing the power of machine learning, we create a dynamic layer that curates smarter audiences those that closely resemble our most valuable converted customers. This enables us to execute precise retargeting campaigns that not only drive meaningful engagement but also optimize marketing budgets, resulting in enhanced audience selection and significantly higher conversion rates.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "The paid search landscape is undergoing a remarkable transformation, evolving from traditional keyword-centric strategies to a more nuanced approach that prioritizes audience targeting. This shift is not just a trend; it’s a response to the ever-increasing demand for precision and effectiveness in reaching potential customers in a crowded digital marketplace.\n\nAt the forefront of this evolution is our innovative automated system designed to identify high-intent users through sophisticated batch processing of their website behaviour. By harnessing the power of machine learning, we create a dynamic layer that curates smarter audiences those that closely resemble our most valuable converted customers. This enables us to execute precise retargeting campaigns that not only drive meaningful engagement but also optimize marketing budgets, resulting in enhanced audience selection and significantly higher conversion rates.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1685, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json index 86fb183ff..f6f9ebb69 100644 --- a/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json +++ b/pydata-global-2024/videos/son-the-nguyen-improve-llms-alignment-with-complete-and-robust-preference-data-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nThis talk explores how to align large language models (LLMs) with human values via preference learning (PL) in the presence of challenges such as incomplete and corrupted data in preference datasets. We propose a novel method for recalibrating values to tackle these issues, enhancing LLM resilience by improving the robustness of existing models. The session highlights real-world experiments that show how the method addresses adversarial noise and unobserved comparisons, making it essential for building more reliable, ethically aligned AI systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "This talk explores how to align large language models (LLMs) with human values via preference learning (PL) in the presence of challenges such as incomplete and corrupted data in preference datasets. We propose a novel method for recalibrating values to tackle these issues, enhancing LLM resilience by improving the robustness of existing models. The session highlights real-world experiments that show how the method addresses adversarial noise and unobserved comparisons, making it essential for building more reliable, ethically aligned AI systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1921, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json index 261dad5f7..967f68495 100644 --- a/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json +++ b/pydata-global-2024/videos/timothy-spann-it-s-in-the-air-tonight-sensor-data-in-rag-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nToday we will learn how to build an application around sensor data, REST Feeds, weather data, traffic cameras and vector data. We will write a simple Python application to collect various structured, semistructured data and unstructured data, We will process, enrich, augment and vectorize this data and insert it into a Vector Database to be used for semantic hybrid search and filtering. We will then build a Jupyter notebook to analyze, query and return this data.\n\nAlong the way we will learn the basics of Vector Databases and Milvus. While building it we will see the practical reasons we choose what indexes make sense, what to vectorize, how to query multiple vectors even when one is an image and one is text. We will see why we do filtering. We will then use our vector database of Air Quality readings to feed our LLM and get proper answers to Air Quality questions. I will show you how to all the steps to build a RAG application with Milvus, LangChain, Ollama, Python and Air Quality Reports. Finally after demos I will answer questions, provide the source code and additional resources including articles.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Today we will learn how to build an application around sensor data, REST Feeds, weather data, traffic cameras and vector data. We will write a simple Python application to collect various structured, semistructured data and unstructured data, We will process, enrich, augment and vectorize this data and insert it into a Vector Database to be used for semantic hybrid search and filtering. We will then build a Jupyter notebook to analyze, query and return this data.\n\nAlong the way we will learn the basics of Vector Databases and Milvus. While building it we will see the practical reasons we choose what indexes make sense, what to vectorize, how to query multiple vectors even when one is an image and one is text. We will see why we do filtering. We will then use our vector database of Air Quality readings to feed our LLM and get proper answers to Air Quality questions. I will show you how to all the steps to build a RAG application with Milvus, LangChain, Ollama, Python and Air Quality Reports. Finally after demos I will answer questions, provide the source code and additional resources including articles.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 5385, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json index 37ab83837..bac337703 100644 --- a/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json +++ b/pydata-global-2024/videos/toby-dylan-hocking-using-and-contributing-to-the-data-table-package-for-efficient-big-data-analysis.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\ndata.table is an R package with C code that is one of the most efficient open-source in-memory data manipulation packages available today. First released to CRAN by Matt Dowle in 2006, it continues to grow in popularity, and now over 1500 other CRAN packages depend on data.table. This talk will start with data reading from CSV, discuss basic and advanced data manipulation topics, and finally will end with a discussion about how you can contribute to data.table.\n\nhttps://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "data.table is an R package with C code that is one of the most efficient open-source in-memory data manipulation packages available today. First released to CRAN by Matt Dowle in 2006, it continues to grow in popularity, and now over 1500 other CRAN packages depend on data.table. This talk will start with data reading from CSV, discuss basic and advanced data manipulation topics, and finally will end with a discussion about how you can contribute to data.table.\n\nhttps://github.com/tdhock/2023-10-LatinR-data.table?tab=readme-ov-file#english\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 4098, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json index c29aef583..507a8248f 100644 --- a/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json +++ b/pydata-global-2024/videos/tony-ojeda-generative-ai-python-unlocking-efficiency-personalization-and-insight.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nGenerative AI is revolutionizing industries by enhancing efficiency, personalization, and insight. This talk explores how a robust Python ecosystem, including Streamlit, various libraries, and APIs, is harnessed to build powerful generative AI applications. Attendees will gain insights into the practical implementation of these technologies and their transformative impact on business operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Generative AI is revolutionizing industries by enhancing efficiency, personalization, and insight. This talk explores how a robust Python ecosystem, including Streamlit, various libraries, and APIs, is harnessed to build powerful generative AI applications. Attendees will gain insights into the practical implementation of these technologies and their transformative impact on business operations.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2159, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json index 9cf0be601..c5b0ac0e6 100644 --- a/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json +++ b/pydata-global-2024/videos/tun-shwe-moving-from-offline-to-online-machine-learning-with-river-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nLearn how to get started on your online ML journey with River, an open source Python ML library. The foundations of machine learning were built on offline batch processing techniques for model training and inference. As organisations become more dependent on real-time data, the technological trend for machine learning in production is moving towards adding an online stream processing approach. This has benefits such as lower computational requirements due to being able to incrementally learn from a stream of data points, which enables the continual upgrading of models by adapting to real-time changes in data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Learn how to get started on your online ML journey with River, an open source Python ML library. The foundations of machine learning were built on offline batch processing techniques for model training and inference. As organisations become more dependent on real-time data, the technological trend for machine learning in production is moving towards adding an online stream processing approach. This has benefits such as lower computational requirements due to being able to incrementally learn from a stream of data points, which enables the continual upgrading of models by adapting to real-time changes in data.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1815, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json index 5e2502274..23a2dac16 100644 --- a/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json +++ b/pydata-global-2024/videos/vyoma-gajjar-llms-in-regulated-industries-challenges-and-governance-solutions-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nAs large language models (LLMs) become increasingly integrated into industries like finance, healthcare, and law, ensuring their responsible deployment is critical\u2014particularly in highly regulated environments. These industries face unique challenges, including data privacy, compliance with strict regulations, and minimizing the risks of biased or untrustworthy outputs.\n\nThis session will explore the complexities of using LLMs in regulated industries and present a governance framework to address these challenges. We'll cover practical solutions for deploying LLMs while adhering to industry-specific regulations, ensuring transparency, reducing bias, and maintaining data privacy. Attendees will learn how to implement governance best practices at various stages of the LLM lifecycle\u2014from model training and validation to deployment and ongoing monitoring.\n\nDrawing on real-world examples and lessons learned, this talk will equip data scientists, machine learning engineers, and AI leaders with actionable strategies for navigating regulatory compliance and minimizing risks, while still harnessing the full potential of LLMs to drive innovation.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "As large language models (LLMs) become increasingly integrated into industries like finance, healthcare, and law, ensuring their responsible deployment is critical—particularly in highly regulated environments. These industries face unique challenges, including data privacy, compliance with strict regulations, and minimizing the risks of biased or untrustworthy outputs.\n\nThis session will explore the complexities of using LLMs in regulated industries and present a governance framework to address these challenges. We'll cover practical solutions for deploying LLMs while adhering to industry-specific regulations, ensuring transparency, reducing bias, and maintaining data privacy. Attendees will learn how to implement governance best practices at various stages of the LLM lifecycle—from model training and validation to deployment and ongoing monitoring.\n\nDrawing on real-world examples and lessons learned, this talk will equip data scientists, machine learning engineers, and AI leaders with actionable strategies for navigating regulatory compliance and minimizing risks, while still harnessing the full potential of LLMs to drive innovation.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1455, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json index 047cd00f0..d9687d400 100644 --- a/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json +++ b/pydata-global-2024/videos/wes-mckinney-retooling-for-a-smaller-data-era-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nIn this talk, I will offer my perspective on the modern data tools landscape and in particular user-facing tools for interactive data science and data exploration. The latest trends of composable data systems and embeddable query engines like DuckDB and DataFusion create both challenges and opportunities to create a more coherent and productive stack of tools for both end user data scientists and developers building data systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "In this talk, I will offer my perspective on the modern data tools landscape and in particular user-facing tools for interactive data science and data exploration. The latest trends of composable data systems and embeddable query engines like DuckDB and DataFusion create both challenges and opportunities to create a more coherent and productive stack of tools for both end user data scientists and developers building data systems.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1804, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json index 6ab6e6d19..155e057c1 100644 --- a/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json +++ b/pydata-global-2024/videos/zain-hasan-colpalis-vision-powered-rag-for-enterprise-documents-pydata-global-2024.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nTraditional document processing for Retrieval-Augmented Generation (RAG) often involves cumbersome, error-prone extraction pipelines, hampering AI's ability to retrieve high-quality information from complex formats like PDFs and PowerPoint decks. ColPali disrupts this process by embedding entire pages\u2014text, visuals, and layout\u2014into rich, multi-vector representations using Vision Language Models (VLMs). This talk explores how ColPali, paired with multimodal models like the Llama 3.2 Vision series, enables RAG systems to \u201csee\u201d and reason over documents, dramatically improving retrieval performance. Attendees will learn to implement ColPali for enhanced, scalable, and robust enterprise knowledge retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Traditional document processing for Retrieval-Augmented Generation (RAG) often involves cumbersome, error-prone extraction pipelines, hampering AI's ability to retrieve high-quality information from complex formats like PDFs and PowerPoint decks. ColPali disrupts this process by embedding entire pages—text, visuals, and layout—into rich, multi-vector representations using Vision Language Models (VLMs). This talk explores how ColPali, paired with multimodal models like the Llama 3.2 Vision series, enables RAG systems to “see” and reason over documents, dramatically improving retrieval performance. Attendees will learn to implement ColPali for enhanced, scalable, and robust enterprise knowledge retrieval.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 2151, "language": "eng", "recorded": "2024-12-03", diff --git a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json index 15fb4b701..984a2b2ba 100644 --- a/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json +++ b/pydata-global-2024/videos/zhen-tony-zhao-training-language-models-to-identify-urgent-messages-in-real-time.json @@ -1,5 +1,5 @@ { - "description": "www.pydata.org\n\nProviding timely maternal healthcare in developing countries is a critical challenge. This talk demonstrates how data-driven solutions can bridge healthcare gaps and improve access to vital healthcare information for pregnant women, with user privacy in mind. To do so, we fine-tuned the Gemma-2 2 billion parameter instruction model on a synthetic dataset in order to detect whether user messages pertain to urgent or non-urgent maternal healthcare issues. By quickly identifying and prioritizing user inquiries, the model can aid help desks by ensuring urgent messages are promptly forwarded to the appropriate healthcare professionals for immediate intervention.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.\n\n00:00 Welcome!\n00:10 Help us add time stamps or captions to this video! See the description for details.\n\nWant to help add timestamps to our YouTube videos to help with discoverability? Find out more here: https://github.com/numfocus/YouTubeVideoTimestamps", + "description": "Providing timely maternal healthcare in developing countries is a critical challenge. This talk demonstrates how data-driven solutions can bridge healthcare gaps and improve access to vital healthcare information for pregnant women, with user privacy in mind. To do so, we fine-tuned the Gemma-2 2 billion parameter instruction model on a synthetic dataset in order to detect whether user messages pertain to urgent or non-urgent maternal healthcare issues. By quickly identifying and prioritizing user inquiries, the model can aid help desks by ensuring urgent messages are promptly forwarded to the appropriate healthcare professionals for immediate intervention.\n\nPyData is an educational program of NumFOCUS, a 501(c)3 non-profit organization in the United States. PyData provides a forum for the international community of users and developers of data analysis tools to share ideas and learn from each other. The global PyData network promotes discussion of best practices, new approaches, and emerging technologies for data management, processing, analytics, and visualization. PyData communities approach data science using many languages, including (but not limited to) Python, Julia, and R. \n\nPyData conferences aim to be accessible and community-driven, with novice to advanced level presentations. PyData tutorials and talks bring attendees the latest project features along with cutting-edge use cases.", "duration": 1809, "language": "eng", "recorded": "2024-12-03",