From 6978ea5641bc8b4db80392e7cffd2923dbf011fa Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 15:46:38 -0800 Subject: [PATCH 1/7] feat(utmstrip): handle more tracking params, add maint infrastructure --- .github/workflows/tracking-params-check.yml | 73 +++++++++++ bookmarklets.json | 2 +- dist/utmstrip.bookmarklet | 2 +- docs/Maintenance-Plan-UtmStrip.md | 117 +++++++++++++++++ docs/utmstrip-impl-reference.md | 136 ++++++++++++++++++++ package.json | 4 +- scripts/test-utmstrip.js | 20 ++- src/utmstrip.ts | 18 ++- tests/utmstrip-test-urls.json | 105 +++++++++++++++ 9 files changed, 468 insertions(+), 9 deletions(-) create mode 100644 .github/workflows/tracking-params-check.yml create mode 100644 docs/Maintenance-Plan-UtmStrip.md create mode 100644 docs/utmstrip-impl-reference.md diff --git a/.github/workflows/tracking-params-check.yml b/.github/workflows/tracking-params-check.yml new file mode 100644 index 0000000..270e371 --- /dev/null +++ b/.github/workflows/tracking-params-check.yml @@ -0,0 +1,73 @@ +name: Tracking Parameter Monitor + +on: + schedule: + - cron: '0 0 1 * *' # 1st of each month at midnight UTC + workflow_dispatch: # allow manual trigger + +jobs: + check-params: + runs-on: ubuntu-latest + permissions: + issues: write + contents: read + + steps: + - uses: actions/checkout@v4 + + - name: Fetch DuckDuckGo tracking parameters + run: | + curl -fsSL \ + "https://raw.githubusercontent.com/duckduckgo/privacy-configuration/main/features/tracking-parameters.json" \ + -o /tmp/ddg-params.json + + - name: Extract and compare parameters + id: compare + run: | + # Extract parameter names from DuckDuckGo JSON + # The DDG JSON has parameters as an array of objects with a "parameter" key + jq -r '.settings.parameters[].parameter' /tmp/ddg-params.json \ + | sort > /tmp/ddg-list.txt + + # Extract quoted string values from utmstrip.ts (single-quoted identifiers) + # Matches 'param_name' patterns, strips quotes, excludes TypeScript types + grep -oE "'[a-zA-Z_][a-zA-Z0-9_]*'" src/utmstrip.ts \ + | tr -d "'" | sort -u > /tmp/utmstrip-list.txt + + # Find params in DDG list not covered in utmstrip.ts + comm -23 /tmp/ddg-list.txt /tmp/utmstrip-list.txt > /tmp/missing.txt + + if [ -s /tmp/missing.txt ]; then + echo "new_params=true" >> "$GITHUB_OUTPUT" + MISSING_PARAMS=$(cat /tmp/missing.txt) + printf 'MISSING_PARAMS<> "$GITHUB_ENV" + else + echo "new_params=false" >> "$GITHUB_OUTPUT" + fi + + - name: Open GitHub issue if new params found + if: steps.compare.outputs.new_params == 'true' + env: + MISSING_PARAMS: ${{ env.MISSING_PARAMS }} + uses: actions/github-script@v7 + with: + script: | + await github.rest.issues.create({ + owner: context.repo.owner, + repo: context.repo.repo, + title: 'Tracking params check: new parameters detected', + labels: ['maintenance', 'utmstrip'], + body: [ + '## Monthly Tracking Parameter Check', + '', + 'The following parameters appear in DuckDuckGo\'s tracking protection list', + 'but are not present in `src/utmstrip.ts`:', + '', + '```', + process.env.MISSING_PARAMS, + '```', + '', + 'Please review these against the quarterly maintenance checklist in', + '`docs/Maintenance-Plan-UtmStrip.md` and determine if they should be added.', + ].join('\n') + }); diff --git a/bookmarklets.json b/bookmarklets.json index 26a2f52..27354ee 100644 --- a/bookmarklets.json +++ b/bookmarklets.json @@ -88,7 +88,7 @@ { "name": "UtmStrip", "file": "utmstrip.bookmarklet", - "version": "2.3.1" + "version": "2.3.2" }, { "name": "deLighter", diff --git a/dist/utmstrip.bookmarklet b/dist/utmstrip.bookmarklet index f359c73..44b9d48 100644 --- a/dist/utmstrip.bookmarklet +++ b/dist/utmstrip.bookmarklet @@ -1 +1 @@ -javascript:'use%20strict'%3B(()=%3E%7Bconst%20t=location.pathname%2Ce=location.search%3Bif(3%3Ee.length%26%26!t.includes('%2Famp'))return%3Bconst%20i=location.hostname%2Cs=new%20URLSearchParams(e)%2Ca=%5B'assetType'%2C'elqTrack'%2C'mkt%5Ftok'%2C'originalReferer'%2C'referrer'%2C'terminal%5Fid'%2C'trk'%2C'trkCampaign'%2C'trkInfo'%2C'anid'%2C'assetid'%2C'campaignid'%2C'eid'%2C'gclid'%2C'recipientid'%2C'siteid'%2C'sib%5Fcuid'%2C'sib%5Fsid'%2C'%5Fbta%5Ftid'%2C'%5Fbta%5Fc'%2C'%5F%5Fs'%2C'fbclid'%2C'hrc'%2C'igsh'%2C'igshid'%2C'refsrc'%2C'%5Fgl'%2C'gclsrc'%2C'srsltid'%2C'%5Fhsenc'%2C'%5Fhsmi'%2C'%5F%5Fhsfp'%2C'%5F%5Fhssc'%2C'%5F%5Fhstc'%2C'cm%5Fmmc'%2C'cm%5Fre'%2C'cm%5Fsp'%2C'manual%5Fcm%5Fmmc'%2C'%5Fke'%2C'%5Fkx'%2C'trk%5Fcontact'%2C'trk%5Fmsg'%2C'trk%5Fmodule'%2C'trk%5Fsid'%2C'mc%5Fcid'%2C'mc%5Feid'%2C'iesrc'%2C'msclkid'%2C'dclid'%2C'twclid'%2C'ttclid'%2C'oly%5Fenc%5Fid'%2C'oly%5Fanon%5Fid'%2C'epik'%2C'vero%5Fid'%5D%2C%5F=%5B'fb%5F'%2C'action%5F'%2C'ga%5F'%2C'utm%5F'%2C'hmb%5F'%2C'hsa%5F'%2C'mtm%5F'%2C'pk%5F'%2C'oly%5F'%2C'stm%5F'%5D%3B%2F%5C.aliexpress%5C.%5Ba-z%5D%7B2%2C3%7D%24%2F.test(i)%3F(a.push('algo%5Fevid'%2C'algo%5Fpvid'%2C'btsid'%2C'spm'%2C'scm'%2C'ws%5Fab%5Ftest')%2C%5F.push('aff%5F')):%2F(%7C%5C.)amazon%5C.com%24%2F.test(i)%3Fa.push('%5Fencoding'%2C'asc%5Fcampaign'%2C'asc%5Frefurl'%2C'asc%5Fsource'%2C'ascsubtag'%2C'content-id'%2C'crid'%2C'cv%5Fct%5Fcx'%2C'dib%5Ftag'%2C'dib'%2C'ie'%2C'language'%2C'linkCode'%2C'linkId'%2C'pd%5Frd%5Fi'%2C'pd%5Frd%5Fr'%2C'pd%5Frd%5Fw'%2C'pd%5Frd%5Fwg'%2C'pf%5Frd%5Fi'%2C'pf%5Frd%5Fm'%2C'pf%5Frd%5Fp'%2C'pf%5Frd%5Fr'%2C'pf%5Frd%5Fs'%2C'pf%5Frd%5Ft'%2C'pf'%2C'psc'%2C'qid'%2C'ref%5F'%2C'sprefix'%2C'sr'%2C'tag'%2C'th'):i.endsWith('.ebay.com')%7C%7C%2F%5C.ebay%5C.co%5C.%5Ba-z%5D%7B2%7D%24%2F.test(i)%3Fa.push('mkevt'%2C'mkcid'%2C'mkrid'%2C'campid'%2C'toolid'%2C'customid'%2C'norover'%2C'itm'%2C'amdata'):%2F(%5E%7C%5C.)google%5C.(com%7C%5Ba-z%5D%7B2%7D%7Ccom%3F%5C.%5Ba-z%5D%7B2%7D)%24%2F.test(i)%26%26t.startsWith('%2Fsearch')%3Fa.push('aqs'%2C'ei'%2C'gs%5Flp'%2C'gs%5Fssp'%2C'iflsig'%2C'sca%5Fesv'%2C'ved'%2C'oq'%2C'sa'%2C'uact'%2C'rlz'%2C'sxsrf'%2C'bih'%2C'biw'%2C'client'%2C'prmd'%2C'sclient'%2C'source'%2C'sourceid'%2C'ie'%2C'oe'):i.endsWith('.linkedin.com')%3Fa.push('li%5Ffat%5Fid'%2C'licu'%2C'lipi'%2C'midSig'%2C'midToken'%2C'refId'):i.endsWith('.target.com')%3Fa.push('afid'%2C'clkid'%2C'lnm'%2C'preselect'%2C'tref'):i.endsWith('.temu.com')%3F(a.push('%5Fbg%5Ffs'%2C'%5Fp%5Fjump%5Fid'%2C'%5Fp%5Frfs'%2C'refer%5Fpage%5Fid'%2C'refer%5Fpage%5Fname'%2C'refer%5Fpage%5Fsn')%2C%5F.push('%5Fx%5F')):i.endsWith('.tiktok.com')%7C%7C'tiktok.com'===i%3Fa.push('%5Fd'%2C'%5Fr'%2C'%5Ft'%2C'is%5Ffrom%5Fwebapp'%2C'preview%5Fpb'%2C'share%5Fapp%5Fname'%2C'share%5Fitem%5Fid'%2C'tt4d%5Ft'%2C'timestamp'%2C'u%5Fcode'%2C'user%5Fid'):%2F%5C.(twitter%7Cx)%5C.com%24%2F.test(i)%7C%7C%2F%5E(twitter%7Cx)%5C.com%24%2F.test(i)%3Fa.push('cn'%2C'ref%5Fsrc'%2C'ref%5Furl'%2C's'%2C't'):i.endsWith('.walmart.com')%3Fa.push('adsredirect'%2C'affiliates%5Fad%5Fid'%2C'athcpid'%2C'athpgid'%2C'athcgid'%2C'athmtid'%2C'athstid'%2C'athznid'%2C'athiession'%2C'athancid'%2C'athposb'%2C'athena'%2C'campaign%5Fid'%2C'wmlspartner'%2C'wt%5Fmc'):(%2F(m%7Cwww)%5C.youtube%5C.com%24%2F.test(i)%7C%7C'youtu.be'===i%7C%7C'www.youtube-nocookie.com'===i)%26%26a.push('ac'%2C'annotation%5Fid'%2C'app'%2C'feature'%2C'gclid'%2C'kw'%2C'src%5Fvid')%3Bconst%20c=new%20Set(a.map(t=%3Et.toLowerCase()))%2Cr=t=%3E%7Bconst%20e=t.toLowerCase()%3Breturn%20c.has(e)%7C%7C%5F.some(t=%3Ee.startsWith(t))%7C%7C%2F%5Ecm%5Fmmca%5Cd%2B%24%2Fi.test(t)%7D%3Bfor(const%20t%20of%5B...s.keys()%5D)r(t)%26%26s.delete(t)%3Blet%20d=s.toString()%3Bd=d%3F'%3F'%2Bd:''%3Bconst%20o=t.replace(%2F%5C%2Famp%5C%2F%3F%24%2F%2C'')%2Cn=%60%24%7Blocation.protocol%7D%2F%2F%24%7Blocation.host%7D%24%7Bo%7D%24%7Bd%7D%24%7Blocation.hash%7D%60%3Bif(confirm('Update%20history%20and%20copy%20cleaned%20URL%20to%20clipboard%3F')%26%26(navigator.clipboard.writeText(n)%2Ce!==d%7C%7Ct!==o))%7Bhistory.replaceState(null%2C''%2Cn)%3Bconst%20t=window.open(n%2C'%5Fself'%2C'noreferrer')%3Bt%26%26(t.opener=null)%7D%7D)()%3Bvoid'2.3.1' \ No newline at end of file +javascript:'use%20strict'%3B(()=%3E%7Bconst%20t=location.pathname%2Ce=location.search%3Bif(3%3Ee.length%26%26!t.includes('%2Famp'))return%3Bconst%20i=location.hostname%2Cs=new%20URLSearchParams(e)%2C%5F=%5B'assetType'%2C'elqTrack'%2C'mkt%5Ftok'%2C'originalReferer'%2C'referrer'%2C'terminal%5Fid'%2C'trk'%2C'trkCampaign'%2C'trkInfo'%2C'anid'%2C'assetid'%2C'campaignid'%2C'eid'%2C'gclid'%2C'recipientid'%2C'siteid'%2C'sib%5Fcuid'%2C'sib%5Fsid'%2C'%5Fbta%5Ftid'%2C'%5Fbta%5Fc'%2C'%5F%5Fs'%2C'fbclid'%2C'hrc'%2C'igsh'%2C'igshid'%2C'refsrc'%2C'%5Fgl'%2C'gclsrc'%2C'srsltid'%2C'%5Fhsenc'%2C'%5Fhsmi'%2C'%5F%5Fhsfp'%2C'%5F%5Fhssc'%2C'%5F%5Fhstc'%2C'cm%5Fmmc'%2C'cm%5Fre'%2C'cm%5Fsp'%2C'manual%5Fcm%5Fmmc'%2C'%5Fke'%2C'%5Fkx'%2C'trk%5Fcontact'%2C'trk%5Fmsg'%2C'trk%5Fmodule'%2C'trk%5Fsid'%2C'mc%5Fcid'%2C'mc%5Feid'%2C'iesrc'%2C'msclkid'%2C'dclid'%2C'twclid'%2C'ttclid'%2C'oly%5Fenc%5Fid'%2C'oly%5Fanon%5Fid'%2C'epik'%2C'vero%5Fid'%2C'rdt%5Fcid'%2C'ScCid'%2C'qclid'%2C'tblci'%2C'cjevent'%2C'ef%5Fid'%2C'outbrain%5Fcid'%5D%2Ca=%5B'fb%5F'%2C'action%5F'%2C'ga%5F'%2C'utm%5F'%2C'hmb%5F'%2C'hsa%5F'%2C'mtm%5F'%2C'pk%5F'%2C'oly%5F'%2C'stm%5F'%5D%3B%2F%5C.aliexpress%5C.%5Ba-z%5D%7B2%2C3%7D%24%2F.test(i)%3F(%5F.push('algo%5Fevid'%2C'algo%5Fpvid'%2C'btsid'%2C'spm'%2C'scm'%2C'ws%5Fab%5Ftest')%2Ca.push('aff%5F')):%2F(%7C%5C.)amazon%5C.com%24%2F.test(i)%3F%5F.push('%5Fencoding'%2C'asc%5Fcampaign'%2C'asc%5Frefurl'%2C'asc%5Fsource'%2C'ascsubtag'%2C'content-id'%2C'crid'%2C'cv%5Fct%5Fcx'%2C'dib%5Ftag'%2C'dib'%2C'ie'%2C'language'%2C'linkCode'%2C'linkId'%2C'pd%5Frd%5Fi'%2C'pd%5Frd%5Fr'%2C'pd%5Frd%5Fw'%2C'pd%5Frd%5Fwg'%2C'pf%5Frd%5Fi'%2C'pf%5Frd%5Fm'%2C'pf%5Frd%5Fp'%2C'pf%5Frd%5Fr'%2C'pf%5Frd%5Fs'%2C'pf%5Frd%5Ft'%2C'pf'%2C'psc'%2C'qid'%2C'ref%5F'%2C'sprefix'%2C'sr'%2C'tag'%2C'th'):i.endsWith('.ebay.com')%7C%7C%2F%5C.ebay%5C.co%5C.%5Ba-z%5D%7B2%7D%24%2F.test(i)%3F%5F.push('mkevt'%2C'mkcid'%2C'mkrid'%2C'campid'%2C'toolid'%2C'customid'%2C'norover'%2C'itm'%2C'amdata'):%2F(%5E%7C%5C.)google%5C.(com%7C%5Ba-z%5D%7B2%7D%7Ccom%3F%5C.%5Ba-z%5D%7B2%7D)%24%2F.test(i)%26%26t.startsWith('%2Fsearch')%3F%5F.push('aqs'%2C'ei'%2C'gs%5Flp'%2C'gs%5Fssp'%2C'iflsig'%2C'sca%5Fesv'%2C'ved'%2C'oq'%2C'sa'%2C'uact'%2C'rlz'%2C'sxsrf'%2C'bih'%2C'biw'%2C'client'%2C'prmd'%2C'sclient'%2C'source'%2C'sourceid'%2C'ie'%2C'oe'):i.endsWith('.linkedin.com')%3F%5F.push('li%5Ffat%5Fid'%2C'licu'%2C'lipi'%2C'midSig'%2C'midToken'%2C'refId'):i.endsWith('.target.com')%3F%5F.push('afid'%2C'clkid'%2C'lnm'%2C'preselect'%2C'tref'):i.endsWith('.temu.com')%3F(%5F.push('%5Fbg%5Ffs'%2C'%5Fp%5Fjump%5Fid'%2C'%5Fp%5Frfs'%2C'refer%5Fpage%5Fid'%2C'refer%5Fpage%5Fname'%2C'refer%5Fpage%5Fsn')%2Ca.push('%5Fx%5F')):i.endsWith('.tiktok.com')%7C%7C'tiktok.com'===i%3F%5F.push('%5Fd'%2C'%5Fr'%2C'%5Ft'%2C'is%5Ffrom%5Fwebapp'%2C'preview%5Fpb'%2C'share%5Fapp%5Fname'%2C'share%5Fitem%5Fid'%2C'tt4d%5Ft'%2C'timestamp'%2C'u%5Fcode'%2C'user%5Fid'):%2F%5C.(twitter%7Cx)%5C.com%24%2F.test(i)%7C%7C%2F%5E(twitter%7Cx)%5C.com%24%2F.test(i)%3F%5F.push('cn'%2C'ref%5Fsrc'%2C'ref%5Furl'%2C's'%2C't'):i.endsWith('.walmart.com')%3F%5F.push('adsredirect'%2C'affiliates%5Fad%5Fid'%2C'athcpid'%2C'athpgid'%2C'athcgid'%2C'athmtid'%2C'athstid'%2C'athznid'%2C'athiession'%2C'athancid'%2C'athposb'%2C'athena'%2C'campaign%5Fid'%2C'wmlspartner'%2C'wt%5Fmc'):(%2F(m%7Cwww)%5C.youtube%5C.com%24%2F.test(i)%7C%7C'youtu.be'===i%7C%7C'www.youtube-nocookie.com'===i)%26%26%5F.push('ac'%2C'annotation%5Fid'%2C'app'%2C'feature'%2C'gclid'%2C'kw'%2C'src%5Fvid')%3Bconst%20c=new%20Set(%5F.map(t=%3Et.toLowerCase()))%2Cr=t=%3E%7Bconst%20e=t.toLowerCase()%3Breturn%20c.has(e)%7C%7Ca.some(t=%3Ee.startsWith(t))%7C%7C%2F%5Ecm%5Fmmca%5Cd%2B%24%2Fi.test(t)%7D%3Bfor(const%20t%20of%5B...s.keys()%5D)r(t)%26%26s.delete(t)%3Blet%20d=s.toString()%3Bd=d%3F'%3F'%2Bd:''%3Bconst%20o=t.replace(%2F%5C%2Famp%5C%2F%3F%24%2F%2C'')%2Cn=%60%24%7Blocation.protocol%7D%2F%2F%24%7Blocation.host%7D%24%7Bo%7D%24%7Bd%7D%24%7Blocation.hash%7D%60%3Bif(confirm('Update%20history%20and%20copy%20cleaned%20URL%20to%20clipboard%3F')%26%26(navigator.clipboard.writeText(n)%2Ce!==d%7C%7Ct!==o))%7Bhistory.replaceState(null%2C''%2Cn)%3Bconst%20t=window.open(n%2C'%5Fself'%2C'noreferrer')%3Bt%26%26(t.opener=null)%7D%7D)()%3Bvoid'2.3.2' \ No newline at end of file diff --git a/docs/Maintenance-Plan-UtmStrip.md b/docs/Maintenance-Plan-UtmStrip.md new file mode 100644 index 0000000..f349673 --- /dev/null +++ b/docs/Maintenance-Plan-UtmStrip.md @@ -0,0 +1,117 @@ +# UtmStrip Maintenance Guide + +## Overview + +The tracking parameter landscape evolves continuously: ad platforms launch new +click IDs, email platforms rename parameters, and browsers add their own +stripping that shifts expectations. This guide documents a quarterly review +process to keep `src/utmstrip.ts` current and the test infrastructure in sync. + +--- + +## Quarterly Review Checklist + +### Reference Sources to Check + +1. **DuckDuckGo** — +2. **ClearURLs** — (or GitHub rules file) +3. **Firefox ETP** — search for "query parameter stripping" +4. **Brave** — + +### Review Steps + +1. Fetch each reference source (links above) +2. Extract parameter names → compare against `universalExact`, `universalPrefixes`, + and host-specific arrays in `src/utmstrip.ts` +3. For each param in reference sources but NOT in `utmstrip.ts`: + research it → decide add/skip +4. For each param in `utmstrip.ts` NOT in any reference source: research if + deprecated → decide remove +5. Update `src/utmstrip.ts` + `scripts/test-utmstrip.js` + `tests/utmstrip-test-urls.json` + together (all three files must stay in sync) + +### Decision Criteria + +**Add a parameter if:** + +- Two or more reference sources list it, OR +- A major platform (Google, Meta, Microsoft, TikTok, LinkedIn, Amazon) has + published docs + +**Remove a parameter if:** + +- The originating platform no longer exists, OR +- No reference source has listed it for 2+ consecutive quarterly reviews, AND +- No community reports of it in the wild (check GitHub issues) + +**Keep parameters that are:** + +- Rare but real (confirmed real-world examples), even if not in reference + sources + +--- + +## After Making Changes + +1. Update `tests/utmstrip-test-urls.json` (add test cases **before** code changes) +2. Update `scripts/test-utmstrip.js` to match new `src/utmstrip.ts` arrays +3. Update `src/utmstrip.ts` with the new parameters +4. Update `/* cSpell: ignore ... */` comments in both `src/utmstrip.ts` and + `scripts/test-utmstrip.js` if new words would be flagged as misspellings +5. Run: `npm test` +6. Run: `npm run deploy` +7. File a PR; CI will validate the full build + tests + +--- + +## Quarterly Review Schedule + +| Approx. Timing | Focus | +| -------------- | --------------------------------------------------------- | +| Q1 - January | Post-holiday changes; new January GA4 params | +| Q2 - April | Spring marketing; browser spring privacy updates | +| Q3 - July | Mid-year ad platform changes; iOS/Safari summer betas | +| Q4 - October | Pre-holiday season; browser major releases; platform launches | + +Each review should take approximately 1 hour: 30 min research, 20 min code/test changes, +10 min PR. + +--- + +## Parameter Decision Log + +Track decisions here to avoid re-researching the same parameters each quarter. + +### Added Parameters (with rationale) + +| Parameter | Platform | Added | Rationale | +| --------- | ------------ | ----- | --------------------------------------- | +| `rdt_cid` | Reddit Ads | 2026-Q1 | Reddit is now a top-5 ad platform; confirmed in DDG list | +| `ScCid` | Snapchat Ads | 2026-Q1 | Major mobile ad platform; confirmed in DDG list | +| `qclid` | Quora Ads | 2026-Q1 | Confirmed in official Quora Ads docs | +| `tblci` | Taboola | 2026-Q1 | Native ad click ID; appears on news/media sites widely | +| `cjevent` | CJ Affiliate | 2026-Q1 | Commission Junction; one of the largest affiliate networks | +| `ef_id` | Adobe Advertising | 2026-Q1 | Adobe Advertising Cloud redirect ID; common in enterprise | +| `outbrain_cid` | Outbrain | 2026-Q1 | Outbrain native ad click ID; appears alongside Taboola | + +### Skipped Parameters (with reasoning) + +| Parameter | Reason to Skip | +| ----------------- | ------------------------------------------------------ | +| `click_id` | Too generic — not safely stripped universally | +| `distinct_id` | Mixpanel user ID — functional, could break sites | +| `branch_match_id` | Branch.io deep linking — used for native apps, less on web | +| `c_n`, `c_p`, `c_t` | Matomo content tracking — specialized params rare in shared URLs | +| `mytarget_click_id` | VK/Mail.ru Russian platform — very regional, low impact for most users | + +--- + +## Automated Monitoring + +A GitHub Actions workflow (`.github/workflows/tracking-params-check.yml`) runs +monthly on the 1st of each month. It fetches the DuckDuckGo tracking parameters +list, compares against `src/utmstrip.ts`, and opens a GitHub issue if new +parameters are detected. + +This gives early warning before quarterly reviews so researchers aren't +surprised by accumulated drift. diff --git a/docs/utmstrip-impl-reference.md b/docs/utmstrip-impl-reference.md new file mode 100644 index 0000000..eadbb63 --- /dev/null +++ b/docs/utmstrip-impl-reference.md @@ -0,0 +1,136 @@ +# UTMStrip Implementation Reference + +This file contains extracted param lists for the refactored implementation. + +## Universal Params (All Hosts) + +### Exact Matches +```typescript +const universalExact = [ + // Generic tracking + 'assetType', 'elqTrack', 'mkt_tok', 'originalReferer', 'referrer', + 'terminal_id', 'trk', 'trkCampaign', 'trkInfo', + // *id suffix params (expanded) + 'anid', 'assetid', 'campaignid', 'eid', 'gclid', 'recipientid', 'siteid', + // Facebook (when fb_ or fbclid present) + 'fbclid', 'hrc', 'refsrc', + // Google Analytics gcl* + 'gclsrc', + // HubSpot + '_hsenc', '_hsmi', + // MailChimp + 'mc_cid', 'mc_eid', + // Marketo + 'iesrc', +]; +``` + +### Prefix Matches +```typescript +const universalPrefixes = [ + // Facebook + 'fb_', // fb_action_ids, fb_action_types, fb_ref, fb_source + 'action_', // action_object_map, action_ref_map, action_type_map + // Google Analytics + 'utm_', // utm_campaign, utm_cid, utm_content, utm_design, utm_medium, + // utm_name, utm_place, utm_pubreferrer, utm_reader, utm_source, + // utm_swu, utm_term, utm_userid, utm_viz_id + 'ga_', // ga_campaign, ga_cid, ga_content, etc. (same suffixes as utm_) + // HubSpot + 'hmb_', // hmb_campaign, hmb_medium, hmb_source + // Matomo + 'pk_', // pk_campaign, pk_content, pk_kwd, pk_medium, pk_source +]; +``` + +## Host-Specific Params + +### AliExpress (`hostStr.includes('aliexpress.')`) +```typescript +const aliexpressExact = [ + 'btsid', 'ws_ab_test', + 'spm', 'scm', // from s[cp]m + 'algo_evid', 'algo_pvid', // from algo_[ep]vid +]; +const aliexpressPrefixes = [ + 'aff_', // aff_platform, aff_trace_key +]; +``` + +### Amazon (`/(|\.)amazon\.com$/.test(hostStr)`) +```typescript +const amazonExact = [ + // Line 23: _encoding, ie, linkCode, linkId, pf, psc, ref_, tag + '_encoding', 'ie', 'linkCode', 'linkId', 'pf', 'psc', 'ref_', 'tag', + // Line 25: content-id, crid, cv_ct_cx, language, qid, sprefix, sr, th + 'content-id', 'crid', 'cv_ct_cx', 'language', 'qid', 'sprefix', 'sr', 'th', + // Line 26: asc(_campaign|_refurl|_source|subtag) + 'asc_campaign', 'asc_refurl', 'asc_source', 'ascsubtag', + // Line 27: dib(_tag)? + 'dib', 'dib_tag', + // Line 24: p[df]_rd_* (finite list from research) + 'pd_rd_i', 'pd_rd_r', 'pd_rd_w', 'pd_rd_wg', + 'pf_rd_i', 'pf_rd_m', 'pf_rd_p', 'pf_rd_r', 'pf_rd_s', 'pf_rd_t', +]; +``` + +### YouTube (`/(m|www)\.youtube\.com$/.test(hostStr) || hostStr === 'youtu.be' || hostStr === 'www.youtube-nocookie.com'`) +```typescript +const youtubeExact = [ + 'ac', 'annotation_id', 'app', 'feature', 'gclid', 'kw', 'src_vid', +]; +``` + +## Conditional Checks (Optimization) + +The original code uses `.includes()` checks before applying certain rules: +- `searchStr.includes('fb_')` → apply Facebook fb_* rules +- `searchStr.includes('action_')` → apply action_*_map rules +- `searchStr.toLowerCase().includes('id=')` → apply *id rules +- `searchStr.includes('ga_') || searchStr.includes('utm_')` → apply GA rules +- `/[?&]_hs(enc|mi)=/.test(searchStr)` → apply HubSpot _hs* rules +- `searchStr.includes('hmb_')` → apply HubSpot hmb_* rules +- `searchStr.includes('cm_')` → apply IBM cm_* rules (EXCLUDED) +- `/[?&]mc_[ce]id=/.test(searchStr)` → apply MailChimp rules +- `/[?&](iesrc|mkt_tok)=/.test(searchStr)` → apply Marketo rules +- `searchStr.includes('pk_')` → apply Matomo rules + +These can be preserved as optimizations or removed if URLSearchParams iteration is fast enough. + +## Excluded from Scope + +### IBM Coremetrics (`cm_mmca\d+`) +The numeric pattern `cm_mmca1`, `cm_mmca2`, etc. is **excluded** from this refactor. +- Keep original regex: `/([?&])cm_(mmc|mmca\d+|re|sp)=[^&]+/ig` +- Or remove entirely if IBM tracking is deprecated + +## Path Cleanup + +```typescript +// Remove /amp/ or /amp from end of pathname +pathStr = pathStr.replace(/\/amp\/?$/, ''); +``` + +## Post-Strip Cleanup (Current) + +The current implementation does manual string cleanup: +```typescript +searchStr = searchStr.replace(/&&+/g, '&').replace(/&$/, ''); +searchStr = searchStr[0] === '?' ? searchStr.replace(/^\?&/, '?') : `?${searchStr}`; +searchStr = searchStr.length < 3 ? '' : searchStr; +``` + +With URLSearchParams, this is **not needed** - `.toString()` produces clean output. + +## URL Reconstruction + +```typescript +const newURL = `${location.protocol}//${location.host}${pathStr}${searchStr}${location.hash}`; +``` + +With URL object: +```typescript +url.pathname = cleanedPath; +url.search = params.toString(); +const newURL = url.href; // or url.toString() +``` diff --git a/package.json b/package.json index e06e65c..162943d 100644 --- a/package.json +++ b/package.json @@ -56,8 +56,8 @@ "build:minify": "node scripts/minify.js", "build:readme": "node scripts/update-readme.js", "deploy": "npm run build && npm run build:readme", - "test": "npm run build && npm run verify-build", + "test": "npm run build && node scripts/test-utmstrip.js && npm run verify-build", "verify-build": "node scripts/verify-build.js" }, - "version": "4.2.1" + "version": "4.2.2" } diff --git a/scripts/test-utmstrip.js b/scripts/test-utmstrip.js index b054ae0..148ec68 100644 --- a/scripts/test-utmstrip.js +++ b/scripts/test-utmstrip.js @@ -1,7 +1,7 @@ #!/usr/bin/env node /* eslint no-plusplus: 0 */ // cSpell:words Bronto Klaviyo Listrak Omeda -/* cSpell: ignore afid amdata anid ascsubtag assetid athancid athcgid athcpid athiession athmtid athpgid athposb athstid athznid btsid campaignid campid clkid crid customid dclid epik evid fbclid gclid gclsrc hsenc hsfp hsmi hssc hstc iesrc iflsig igsh igshid licu lipi mkcid mkevt mkrid mmca msclkid prmd pvid recipientid sclient siteid sourceid sprefix srsltid sxsrf ttclid twclid uact wmlspartner youtu */ +/* cSpell: ignore afid amdata anid ascsubtag assetid athancid athcgid athcpid athiession athmtid athpgid athposb athstid athznid btsid campaignid campid cjevent clkid crid customid dclid efid epik evid fbclid gclid gclsrc hsenc hsfp hsmi hssc hstc iesrc iflsig igsh igshid licu lipi mkcid mkevt mkrid mmca msclkid outbrain prmd pvid qclid rdtcid recipientid sclient sccid siteid sourceid sprefix srsltid sxsrf tblci ttclid twclid uact wmlspartner youtu */ /** * Test utmstrip.ts logic against test URL corpus * Run: node scripts/test-utmstrip.js @@ -44,7 +44,21 @@ const universalExact = [ // Pinterest 'epik', // Vero - 'vero_id' + 'vero_id', + // Reddit Ads + 'rdt_cid', + // Snapchat Ads + 'ScCid', + // Quora Ads + 'qclid', + // Taboola + 'tblci', + // CJ Affiliate + 'cjevent', + // Adobe Advertising + 'ef_id', + // Outbrain + 'outbrain_cid' ]; const universalPrefixes = [ @@ -119,7 +133,7 @@ const twitterExact = ['cn', 'ref_src', 'ref_url', 's', 't']; const walmartExact = [ 'adsredirect', 'affiliates_ad_id', 'athancid', 'athcgid', 'athcpid', 'athena', 'athiession', 'athmtid', 'athpgid', 'athposb', 'athstid', - 'athznid', 'campaign_id', 'wmlspartner' + 'athznid', 'campaign_id', 'wmlspartner', 'wt_mc' ]; diff --git a/src/utmstrip.ts b/src/utmstrip.ts index c4088ce..b714a06 100644 --- a/src/utmstrip.ts +++ b/src/utmstrip.ts @@ -1,5 +1,5 @@ /* eslint max-statements: ["error", 55] */ -/* cSpell: ignore afid amdata anid ascsubtag assetid athancid athcgid athcpid athiession athmtid athpgid athposb athstid athznid btsid campaignid campid clkid crid customid dclid epik evid fbclid gclid gclsrc hsenc hsfp hsmi hssc hstc iesrc iflsig igsh igshid licu lipi mkcid mkevt mkrid mmca msclkid prmd pvid recipientid sclient siteid sourceid sprefix srsltid sxsrf ttclid twclid uact wmlspartner youtu */ +/* cSpell: ignore afid amdata anid ascsubtag assetid athancid athcgid athcpid athiession athmtid athpgid athposb athstid athznid btsid campaignid campid cjevent clkid crid customid dclid efid epik evid fbclid gclid gclsrc hsenc hsfp hsmi hssc hstc iesrc iflsig igsh igshid licu lipi mkcid mkevt mkrid mmca msclkid outbrain prmd pvid qclid Quora rdtcid recipientid sclient sccid siteid sourceid sprefix srsltid sxsrf Taboola tblci ttclid twclid uact wmlspartner youtu */ (() => { const locPath: string = location.pathname, @@ -47,7 +47,21 @@ // Pinterest 'epik', // Vero - 'vero_id' + 'vero_id', + // Reddit Ads + 'rdt_cid', + // Snapchat Ads + 'ScCid', + // Quora Ads + 'qclid', + // Taboola + 'tblci', + // CJ Affiliate + 'cjevent', + // Adobe Advertising + 'ef_id', + // Outbrain + 'outbrain_cid' ]; // Universal prefix params to strip diff --git a/tests/utmstrip-test-urls.json b/tests/utmstrip-test-urls.json index abc0c4f..ea14a63 100644 --- a/tests/utmstrip-test-urls.json +++ b/tests/utmstrip-test-urls.json @@ -442,6 +442,111 @@ "input": "https://example.com/page?msclkid=abc&igsh=def&_kx=ghi&epik=jkl&vero_id=mno", "expected": "https://example.com/page", "note": "Multiple new universal tracking params in one URL" + }, + { + "name": "Walmart wt_mc param", + "category": "walmart", + "input": "https://www.walmart.com/ip/12345?wt_mc=test&price=9.99", + "expected": "https://www.walmart.com/ip/12345?price=9.99", + "note": "wt_mc should be stripped, price preserved" + }, + { + "name": "Case-insensitive stripping", + "category": "generic", + "input": "https://example.com/page?UTM_SOURCE=google&UTM_MEDIUM=cpc&q=test", + "expected": "https://example.com/page?q=test", + "note": "Uppercase UTM params should be stripped via lowercase normalization" + }, + { + "name": "LinkedIn midSig and midToken", + "category": "linkedin", + "input": "https://www.linkedin.com/posts/user?midSig=abc123&midToken=def456&refId=xyz", + "expected": "https://www.linkedin.com/posts/user", + "note": "LinkedIn midSig, midToken, and refId should all be stripped" + }, + { + "name": "AliExpress regional domain (.ru)", + "category": "aliexpress", + "input": "https://www.aliexpress.ru/item/123.html?spm=a2g0o&scm=1007.123", + "expected": "https://www.aliexpress.ru/item/123.html", + "note": "AliExpress non-.com TLD should match /\\.aliexpress\\.[a-z]{2,3}$/ regex" + }, + { + "name": "eBay UK domain (ebay.co.uk)", + "category": "ebay", + "input": "https://www.ebay.co.uk/itm/12345?mkevt=1&mkcid=1&mkrid=abc", + "expected": "https://www.ebay.co.uk/itm/12345", + "note": "eBay co.uk domain should match /\\.ebay\\.co\\.[a-z]{2}$/ regex" + }, + { + "name": "YouTube nocookie domain", + "category": "youtube", + "input": "https://www.youtube-nocookie.com/embed/abc?feature=share", + "expected": "https://www.youtube-nocookie.com/embed/abc", + "note": "youtube-nocookie.com should have YouTube params stripped" + }, + { + "name": "IBM cm_mmca numeric pattern", + "category": "ibm", + "input": "https://example.com/?cm_mmca1=abc&cm_mmca99=xyz", + "expected": "https://example.com/", + "note": "cm_mmca\\d+ regex pattern should strip both params" + }, + { + "name": "Walmart hash preserved with tracking", + "category": "walmart", + "input": "https://www.walmart.com/ip/12345?wt_mc=test&athcpid=abc#reviews", + "expected": "https://www.walmart.com/ip/12345#reviews", + "note": "Hash fragment should be preserved when Walmart tracking params are stripped" + }, + { + "name": "Reddit Ads click ID", + "category": "ad-platforms", + "input": "https://example.com/page?rdt_cid=abc123", + "expected": "https://example.com/page", + "note": "Reddit Ads rdt_cid should be stripped" + }, + { + "name": "Snapchat click ID", + "category": "ad-platforms", + "input": "https://example.com/page?ScCid=abc123", + "expected": "https://example.com/page", + "note": "Snapchat ScCid should be stripped (case-insensitive match)" + }, + { + "name": "Quora click ID", + "category": "ad-platforms", + "input": "https://example.com/page?qclid=abc123", + "expected": "https://example.com/page", + "note": "Quora Ads qclid should be stripped" + }, + { + "name": "Taboola click ID", + "category": "ad-platforms", + "input": "https://example.com/page?tblci=abc123", + "expected": "https://example.com/page", + "note": "Taboola tblci should be stripped" + }, + { + "name": "CJ Affiliate event ID", + "category": "ad-platforms", + "input": "https://example.com/page?cjevent=abc123", + "expected": "https://example.com/page", + "note": "CJ Affiliate cjevent should be stripped" + }, + { + "name": "Adobe Advertising ef_id", + "category": "ad-platforms", + "input": "https://example.com/page?ef_id=abc123", + "expected": "https://example.com/page", + "note": "Adobe Advertising Cloud ef_id should be stripped" + }, + { + "name": "Outbrain click ID", + "category": "ad-platforms", + "input": "https://example.com/page?outbrain_cid=abc123", + "expected": "https://example.com/page", + "note": "Outbrain outbrain_cid should be stripped" } ], "excludedFromScope": [ From 8ddf3c3f45069569b656e86055f3a3578d531b22 Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 15:54:10 -0800 Subject: [PATCH 2/7] fix: reduce "tracking-params-check.yml" yamllint warnings --- .github/workflows/tracking-params-check.yml | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/.github/workflows/tracking-params-check.yml b/.github/workflows/tracking-params-check.yml index 270e371..3b284ff 100644 --- a/.github/workflows/tracking-params-check.yml +++ b/.github/workflows/tracking-params-check.yml @@ -1,6 +1,7 @@ +--- name: Tracking Parameter Monitor -on: +"on": schedule: - cron: '0 0 1 * *' # 1st of each month at midnight UTC workflow_dispatch: # allow manual trigger @@ -25,11 +26,11 @@ jobs: id: compare run: | # Extract parameter names from DuckDuckGo JSON - # The DDG JSON has parameters as an array of objects with a "parameter" key + # DDG JSON has param as an array of objects w/ a "parameter" key jq -r '.settings.parameters[].parameter' /tmp/ddg-params.json \ | sort > /tmp/ddg-list.txt - # Extract quoted string values from utmstrip.ts (single-quoted identifiers) + # Extract quoted strings from utmstrip.ts (single-quoted identifiers) # Matches 'param_name' patterns, strips quotes, excludes TypeScript types grep -oE "'[a-zA-Z_][a-zA-Z0-9_]*'" src/utmstrip.ts \ | tr -d "'" | sort -u > /tmp/utmstrip-list.txt @@ -60,14 +61,14 @@ jobs: body: [ '## Monthly Tracking Parameter Check', '', - 'The following parameters appear in DuckDuckGo\'s tracking protection list', - 'but are not present in `src/utmstrip.ts`:', + 'The following parameters appear in DuckDuckGo\'s tracking', + 'protection list but are not present in `src/utmstrip.ts`:', '', '```', process.env.MISSING_PARAMS, '```', '', - 'Please review these against the quarterly maintenance checklist in', - '`docs/Maintenance-Plan-UtmStrip.md` and determine if they should be added.', + 'Please review these against the quarterly maintenance checklist,', + '`docs/Maintenance-Plan-UtmStrip.md` to decide to add them.', ].join('\n') }); From 7bfdfd75ef34a454ecfa03b4b5ce34a63e0b6ad5 Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 20:58:19 -0700 Subject: [PATCH 3/7] fix(Markdown): fix formatting warnings --- docs/Maintenance-Plan-UtmStrip.md | 51 ++++++++++++++++--------------- 1 file changed, 26 insertions(+), 25 deletions(-) diff --git a/docs/Maintenance-Plan-UtmStrip.md b/docs/Maintenance-Plan-UtmStrip.md index f349673..6fb2a50 100644 --- a/docs/Maintenance-Plan-UtmStrip.md +++ b/docs/Maintenance-Plan-UtmStrip.md @@ -53,7 +53,8 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. ## After Making Changes -1. Update `tests/utmstrip-test-urls.json` (add test cases **before** code changes) +1. Update `tests/utmstrip-test-urls.json` (add test cases **before** code + changes) 2. Update `scripts/test-utmstrip.js` to match new `src/utmstrip.ts` arrays 3. Update `src/utmstrip.ts` with the new parameters 4. Update `/* cSpell: ignore ... */` comments in both `src/utmstrip.ts` and @@ -66,15 +67,15 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. ## Quarterly Review Schedule -| Approx. Timing | Focus | -| -------------- | --------------------------------------------------------- | -| Q1 - January | Post-holiday changes; new January GA4 params | -| Q2 - April | Spring marketing; browser spring privacy updates | -| Q3 - July | Mid-year ad platform changes; iOS/Safari summer betas | -| Q4 - October | Pre-holiday season; browser major releases; platform launches | +| Approx. Timing | Focus | +| -------------- | ------------------------------------------------------ | +| Q1 - January | Post-holiday changes; new January GA4 params | +| Q2 - April | Spring marketing; browser spring privacy updates | +| Q3 - July | Mid-year ad platform changes; iOS/Safari summer betas | +| Q4 - October | Pre-holiday; browser major releases; platform launches | -Each review should take approximately 1 hour: 30 min research, 20 min code/test changes, -10 min PR. +Each review should take approximately 1 hour: 30 min research, 20 min +code/test changes, 10 min PR. --- @@ -84,25 +85,25 @@ Track decisions here to avoid re-researching the same parameters each quarter. ### Added Parameters (with rationale) -| Parameter | Platform | Added | Rationale | -| --------- | ------------ | ----- | --------------------------------------- | -| `rdt_cid` | Reddit Ads | 2026-Q1 | Reddit is now a top-5 ad platform; confirmed in DDG list | -| `ScCid` | Snapchat Ads | 2026-Q1 | Major mobile ad platform; confirmed in DDG list | -| `qclid` | Quora Ads | 2026-Q1 | Confirmed in official Quora Ads docs | -| `tblci` | Taboola | 2026-Q1 | Native ad click ID; appears on news/media sites widely | -| `cjevent` | CJ Affiliate | 2026-Q1 | Commission Junction; one of the largest affiliate networks | -| `ef_id` | Adobe Advertising | 2026-Q1 | Adobe Advertising Cloud redirect ID; common in enterprise | -| `outbrain_cid` | Outbrain | 2026-Q1 | Outbrain native ad click ID; appears alongside Taboola | +| Parameter | Platform | Added | Rationale | +| -------------- | ------------- | ----- | ------------------------------------- | +| `rdt_cid` | Reddit Ads | 2026-Q1 | A top-5 ad platform; in DDG list | +| `ScCid` | Snapchat Ads | 2026-Q1 | Major mobile ad platform; in DDG list | +| `qclid` | Quora Ads | 2026-Q1 | Confirmed in official Quora Ads docs | +| `tblci` | Taboola | 2026-Q1 | Native ad appears on news/media sites | +| `cjevent` | CJ Affiliate | 2026-Q1 | Commission Junction; large affiliate | +| `ef_id` | Adobe | 2026-Q1 | Adobe Advertising Cloud redirect ID | +| `outbrain_cid` | Outbrain | 2026-Q1 | Native ad ID; often beside Taboola | ### Skipped Parameters (with reasoning) -| Parameter | Reason to Skip | -| ----------------- | ------------------------------------------------------ | -| `click_id` | Too generic — not safely stripped universally | -| `distinct_id` | Mixpanel user ID — functional, could break sites | -| `branch_match_id` | Branch.io deep linking — used for native apps, less on web | -| `c_n`, `c_p`, `c_t` | Matomo content tracking — specialized params rare in shared URLs | -| `mytarget_click_id` | VK/Mail.ru Russian platform — very regional, low impact for most users | +| Parameter | Reason to Skip | +| ------------------- | ------------------------------------------------------- | +| `click_id` | Too generic — not safely stripped universally | +| `distinct_id` | Mixpanel user ID — functional, could break sites | +| `branch_match_id` | Branch.io linking — used for native apps, less on web | +| `c_n`, `c_p`, `c_t` | Matomo tracking — specialized, rare in shared URLs | +| `mytarget_click_id` | VK/Mail.ru Russian platform — very regional, low impact | --- From 660e2ad147f00578cf346c0fef2f6a6199d0f5b1 Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 21:04:37 -0700 Subject: [PATCH 4/7] fix: more Markdown formatting fixes --- docs/Maintenance-Plan-UtmStrip.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Maintenance-Plan-UtmStrip.md b/docs/Maintenance-Plan-UtmStrip.md index 6fb2a50..48601f5 100644 --- a/docs/Maintenance-Plan-UtmStrip.md +++ b/docs/Maintenance-Plan-UtmStrip.md @@ -74,8 +74,8 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. | Q3 - July | Mid-year ad platform changes; iOS/Safari summer betas | | Q4 - October | Pre-holiday; browser major releases; platform launches | -Each review should take approximately 1 hour: 30 min research, 20 min -code/test changes, 10 min PR. +Each review should take approximately 1 hour: 30 min research, 20 min code +& test changes, 10 min PR. --- From 128f6a904dd4d447e07bae717476938fd4964a6b Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 21:17:55 -0700 Subject: [PATCH 5/7] docs: fix Markdown formatting --- docs/utmstrip-impl-reference.md | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/docs/utmstrip-impl-reference.md b/docs/utmstrip-impl-reference.md index eadbb63..1abe71f 100644 --- a/docs/utmstrip-impl-reference.md +++ b/docs/utmstrip-impl-reference.md @@ -5,6 +5,7 @@ This file contains extracted param lists for the refactored implementation. ## Universal Params (All Hosts) ### Exact Matches + ```typescript const universalExact = [ // Generic tracking @@ -26,6 +27,7 @@ const universalExact = [ ``` ### Prefix Matches + ```typescript const universalPrefixes = [ // Facebook @@ -46,6 +48,7 @@ const universalPrefixes = [ ## Host-Specific Params ### AliExpress (`hostStr.includes('aliexpress.')`) + ```typescript const aliexpressExact = [ 'btsid', 'ws_ab_test', @@ -58,6 +61,7 @@ const aliexpressPrefixes = [ ``` ### Amazon (`/(|\.)amazon\.com$/.test(hostStr)`) + ```typescript const amazonExact = [ // Line 23: _encoding, ie, linkCode, linkId, pf, psc, ref_, tag @@ -75,6 +79,7 @@ const amazonExact = [ ``` ### YouTube (`/(m|www)\.youtube\.com$/.test(hostStr) || hostStr === 'youtu.be' || hostStr === 'www.youtube-nocookie.com'`) + ```typescript const youtubeExact = [ 'ac', 'annotation_id', 'app', 'feature', 'gclid', 'kw', 'src_vid', @@ -84,6 +89,7 @@ const youtubeExact = [ ## Conditional Checks (Optimization) The original code uses `.includes()` checks before applying certain rules: + - `searchStr.includes('fb_')` → apply Facebook fb_* rules - `searchStr.includes('action_')` → apply action_*_map rules - `searchStr.toLowerCase().includes('id=')` → apply *id rules @@ -100,7 +106,9 @@ These can be preserved as optimizations or removed if URLSearchParams iteration ## Excluded from Scope ### IBM Coremetrics (`cm_mmca\d+`) + The numeric pattern `cm_mmca1`, `cm_mmca2`, etc. is **excluded** from this refactor. + - Keep original regex: `/([?&])cm_(mmc|mmca\d+|re|sp)=[^&]+/ig` - Or remove entirely if IBM tracking is deprecated @@ -114,6 +122,7 @@ pathStr = pathStr.replace(/\/amp\/?$/, ''); ## Post-Strip Cleanup (Current) The current implementation does manual string cleanup: + ```typescript searchStr = searchStr.replace(/&&+/g, '&').replace(/&$/, ''); searchStr = searchStr[0] === '?' ? searchStr.replace(/^\?&/, '?') : `?${searchStr}`; @@ -129,6 +138,7 @@ const newURL = `${location.protocol}//${location.host}${pathStr}${searchStr}${lo ``` With URL object: + ```typescript url.pathname = cleanedPath; url.search = params.toString(); From afff7faf63719f9fa3053d1afde5d175a36455c3 Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 22:31:21 -0700 Subject: [PATCH 6/7] docs: remove refactoring doc --- docs/utmstrip-impl-reference.md | 146 -------------------------------- 1 file changed, 146 deletions(-) delete mode 100644 docs/utmstrip-impl-reference.md diff --git a/docs/utmstrip-impl-reference.md b/docs/utmstrip-impl-reference.md deleted file mode 100644 index 1abe71f..0000000 --- a/docs/utmstrip-impl-reference.md +++ /dev/null @@ -1,146 +0,0 @@ -# UTMStrip Implementation Reference - -This file contains extracted param lists for the refactored implementation. - -## Universal Params (All Hosts) - -### Exact Matches - -```typescript -const universalExact = [ - // Generic tracking - 'assetType', 'elqTrack', 'mkt_tok', 'originalReferer', 'referrer', - 'terminal_id', 'trk', 'trkCampaign', 'trkInfo', - // *id suffix params (expanded) - 'anid', 'assetid', 'campaignid', 'eid', 'gclid', 'recipientid', 'siteid', - // Facebook (when fb_ or fbclid present) - 'fbclid', 'hrc', 'refsrc', - // Google Analytics gcl* - 'gclsrc', - // HubSpot - '_hsenc', '_hsmi', - // MailChimp - 'mc_cid', 'mc_eid', - // Marketo - 'iesrc', -]; -``` - -### Prefix Matches - -```typescript -const universalPrefixes = [ - // Facebook - 'fb_', // fb_action_ids, fb_action_types, fb_ref, fb_source - 'action_', // action_object_map, action_ref_map, action_type_map - // Google Analytics - 'utm_', // utm_campaign, utm_cid, utm_content, utm_design, utm_medium, - // utm_name, utm_place, utm_pubreferrer, utm_reader, utm_source, - // utm_swu, utm_term, utm_userid, utm_viz_id - 'ga_', // ga_campaign, ga_cid, ga_content, etc. (same suffixes as utm_) - // HubSpot - 'hmb_', // hmb_campaign, hmb_medium, hmb_source - // Matomo - 'pk_', // pk_campaign, pk_content, pk_kwd, pk_medium, pk_source -]; -``` - -## Host-Specific Params - -### AliExpress (`hostStr.includes('aliexpress.')`) - -```typescript -const aliexpressExact = [ - 'btsid', 'ws_ab_test', - 'spm', 'scm', // from s[cp]m - 'algo_evid', 'algo_pvid', // from algo_[ep]vid -]; -const aliexpressPrefixes = [ - 'aff_', // aff_platform, aff_trace_key -]; -``` - -### Amazon (`/(|\.)amazon\.com$/.test(hostStr)`) - -```typescript -const amazonExact = [ - // Line 23: _encoding, ie, linkCode, linkId, pf, psc, ref_, tag - '_encoding', 'ie', 'linkCode', 'linkId', 'pf', 'psc', 'ref_', 'tag', - // Line 25: content-id, crid, cv_ct_cx, language, qid, sprefix, sr, th - 'content-id', 'crid', 'cv_ct_cx', 'language', 'qid', 'sprefix', 'sr', 'th', - // Line 26: asc(_campaign|_refurl|_source|subtag) - 'asc_campaign', 'asc_refurl', 'asc_source', 'ascsubtag', - // Line 27: dib(_tag)? - 'dib', 'dib_tag', - // Line 24: p[df]_rd_* (finite list from research) - 'pd_rd_i', 'pd_rd_r', 'pd_rd_w', 'pd_rd_wg', - 'pf_rd_i', 'pf_rd_m', 'pf_rd_p', 'pf_rd_r', 'pf_rd_s', 'pf_rd_t', -]; -``` - -### YouTube (`/(m|www)\.youtube\.com$/.test(hostStr) || hostStr === 'youtu.be' || hostStr === 'www.youtube-nocookie.com'`) - -```typescript -const youtubeExact = [ - 'ac', 'annotation_id', 'app', 'feature', 'gclid', 'kw', 'src_vid', -]; -``` - -## Conditional Checks (Optimization) - -The original code uses `.includes()` checks before applying certain rules: - -- `searchStr.includes('fb_')` → apply Facebook fb_* rules -- `searchStr.includes('action_')` → apply action_*_map rules -- `searchStr.toLowerCase().includes('id=')` → apply *id rules -- `searchStr.includes('ga_') || searchStr.includes('utm_')` → apply GA rules -- `/[?&]_hs(enc|mi)=/.test(searchStr)` → apply HubSpot _hs* rules -- `searchStr.includes('hmb_')` → apply HubSpot hmb_* rules -- `searchStr.includes('cm_')` → apply IBM cm_* rules (EXCLUDED) -- `/[?&]mc_[ce]id=/.test(searchStr)` → apply MailChimp rules -- `/[?&](iesrc|mkt_tok)=/.test(searchStr)` → apply Marketo rules -- `searchStr.includes('pk_')` → apply Matomo rules - -These can be preserved as optimizations or removed if URLSearchParams iteration is fast enough. - -## Excluded from Scope - -### IBM Coremetrics (`cm_mmca\d+`) - -The numeric pattern `cm_mmca1`, `cm_mmca2`, etc. is **excluded** from this refactor. - -- Keep original regex: `/([?&])cm_(mmc|mmca\d+|re|sp)=[^&]+/ig` -- Or remove entirely if IBM tracking is deprecated - -## Path Cleanup - -```typescript -// Remove /amp/ or /amp from end of pathname -pathStr = pathStr.replace(/\/amp\/?$/, ''); -``` - -## Post-Strip Cleanup (Current) - -The current implementation does manual string cleanup: - -```typescript -searchStr = searchStr.replace(/&&+/g, '&').replace(/&$/, ''); -searchStr = searchStr[0] === '?' ? searchStr.replace(/^\?&/, '?') : `?${searchStr}`; -searchStr = searchStr.length < 3 ? '' : searchStr; -``` - -With URLSearchParams, this is **not needed** - `.toString()` produces clean output. - -## URL Reconstruction - -```typescript -const newURL = `${location.protocol}//${location.host}${pathStr}${searchStr}${location.hash}`; -``` - -With URL object: - -```typescript -url.pathname = cleanedPath; -url.search = params.toString(); -const newURL = url.href; // or url.toString() -``` From 4d671632978e4078ecacc55f66e41bd066e9b7ba Mon Sep 17 00:00:00 2001 From: Tom King Date: Sat, 7 Mar 2026 22:47:29 -0700 Subject: [PATCH 7/7] =?UTF-8?q?docs:=20formatting,=20make=20lines=20?= =?UTF-8?q?=E2=89=A4=2080=20chars?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docs/Maintenance-Plan-UtmStrip.md | 56 +++++++++++++------------------ 1 file changed, 24 insertions(+), 32 deletions(-) diff --git a/docs/Maintenance-Plan-UtmStrip.md b/docs/Maintenance-Plan-UtmStrip.md index 48601f5..04153d5 100644 --- a/docs/Maintenance-Plan-UtmStrip.md +++ b/docs/Maintenance-Plan-UtmStrip.md @@ -7,28 +7,28 @@ click IDs, email platforms rename parameters, and browsers add their own stripping that shifts expectations. This guide documents a quarterly review process to keep `src/utmstrip.ts` current and the test infrastructure in sync. ---- - ## Quarterly Review Checklist ### Reference Sources to Check 1. **DuckDuckGo** — -2. **ClearURLs** — (or GitHub rules file) -3. **Firefox ETP** — search for "query parameter stripping" +2. **ClearURLs** — (or GitHub + rules file) +3. **Firefox ETP** — search for "query parameter + stripping" 4. **Brave** — ### Review Steps 1. Fetch each reference source (links above) -2. Extract parameter names → compare against `universalExact`, `universalPrefixes`, - and host-specific arrays in `src/utmstrip.ts` +2. Extract parameter names → compare against `universalExact`, + `universalPrefixes`, and host-specific arrays in `src/utmstrip.ts` 3. For each param in reference sources but NOT in `utmstrip.ts`: research it → decide add/skip 4. For each param in `utmstrip.ts` NOT in any reference source: research if deprecated → decide remove -5. Update `src/utmstrip.ts` + `scripts/test-utmstrip.js` + `tests/utmstrip-test-urls.json` - together (all three files must stay in sync) +5. Update `src/utmstrip.ts` + `scripts/test-utmstrip.js` + + `tests/utmstrip-test-urls.json` together (all three files must stay in sync) ### Decision Criteria @@ -49,8 +49,6 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. - Rare but real (confirmed real-world examples), even if not in reference sources ---- - ## After Making Changes 1. Update `tests/utmstrip-test-urls.json` (add test cases **before** code @@ -63,8 +61,6 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. 6. Run: `npm run deploy` 7. File a PR; CI will validate the full build + tests ---- - ## Quarterly Review Schedule | Approx. Timing | Focus | @@ -77,35 +73,31 @@ process to keep `src/utmstrip.ts` current and the test infrastructure in sync. Each review should take approximately 1 hour: 30 min research, 20 min code & test changes, 10 min PR. ---- - ## Parameter Decision Log Track decisions here to avoid re-researching the same parameters each quarter. ### Added Parameters (with rationale) -| Parameter | Platform | Added | Rationale | -| -------------- | ------------- | ----- | ------------------------------------- | -| `rdt_cid` | Reddit Ads | 2026-Q1 | A top-5 ad platform; in DDG list | -| `ScCid` | Snapchat Ads | 2026-Q1 | Major mobile ad platform; in DDG list | -| `qclid` | Quora Ads | 2026-Q1 | Confirmed in official Quora Ads docs | -| `tblci` | Taboola | 2026-Q1 | Native ad appears on news/media sites | -| `cjevent` | CJ Affiliate | 2026-Q1 | Commission Junction; large affiliate | -| `ef_id` | Adobe | 2026-Q1 | Adobe Advertising Cloud redirect ID | -| `outbrain_cid` | Outbrain | 2026-Q1 | Native ad ID; often beside Taboola | +| Parameter | Platform | Added | Rationale | +| -------------- | ------------ | ----- | ------------------------------- | +| `rdt_cid` | Reddit Ads | 2026-Q1 | A top-5 ad platform in DDG list | +| `ScCid` | Snapchat Ads | 2026-Q1 | Major mobile ad platform in DDG | +| `qclid` | Quora Ads | 2026-Q1 | Confirmed in Quora Ads docs | +| `tblci` | Taboola | 2026-Q1 | Native ads on news/media sites | +| `cjevent` | CJ Affiliate | 2026-Q1 | Commission Junction affiliates | +| `ef_id` | Adobe | 2026-Q1 | Adobe Advertising Cloud IDs | +| `outbrain_cid` | Outbrain | 2026-Q1 | Native ad ID | ### Skipped Parameters (with reasoning) -| Parameter | Reason to Skip | -| ------------------- | ------------------------------------------------------- | -| `click_id` | Too generic — not safely stripped universally | -| `distinct_id` | Mixpanel user ID — functional, could break sites | -| `branch_match_id` | Branch.io linking — used for native apps, less on web | -| `c_n`, `c_p`, `c_t` | Matomo tracking — specialized, rare in shared URLs | -| `mytarget_click_id` | VK/Mail.ru Russian platform — very regional, low impact | - ---- +| Parameter | Reason to Skip | +| ------------------- | ------------------------------------------- | +| `click_id` | Too generic,not safely stripped universally | +| `distinct_id` | Mixpanel ID — functional, could break sites | +| `branch_match_id` | Branch.io linking used for native apps | +| `c_n`, `c_p`, `c_t` | Matomo tracking — rare in shared URLs | +| `mytarget_click_id` | VK/Mail.ru regional Russian platform | ## Automated Monitoring