From ca5b8c99a56e9a376faadd6a73d6bbcc0f308f2d Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Mon, 16 Feb 2026 01:37:05 +0000
Subject: [PATCH 1/2] changes applied

---
 R/data.table.R        |  6 ++++++
 R/onLoad.R            |  3 ++-
 R/utils.R             | 10 ++++++++++
 inst/tests/tests.Rraw |  9 +++++++++
 4 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/R/data.table.R b/R/data.table.R
index 85d623d392..cca9538749 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2940,6 +2940,12 @@ setnames = function(x,old,new,skip_absent=FALSE) {
     if (!length(new)) return(invisible(x)) # no changes
     if (length(i) != length(new)) internal_error("length(i)!=length(new)") # nocov
   }
+
+  # NEW: Check for duplicates using the centralized helper in utils.R
+  full_names = names(x)
+  full_names[i] = new
+  warn_if_duplicate_names(full_names)
+
   # update the key if the column name being change is in the key
   m = chmatch(names(x)[i], key(x))
   w = which(!is.na(m))
diff --git a/R/onLoad.R b/R/onLoad.R
index b72fee4d1b..e5f54ca794 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -98,7 +98,8 @@
     datatable.auto.index=TRUE,          # DT[col=="val"] to auto add index so 2nd time faster
     datatable.use.index=TRUE,           # global switch to address #1422
     datatable.prettyprint.char=NULL,    # FR #1091
-    datatable.old.matrix.autoname=FALSE # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
+    datatable.old.matrix.autoname=FALSE, # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
+    datatable.warn.duplicate.names=FALSE # ADD THIS LINE FOR ISSUE #4044
   )
   opts = opts[!names(opts) %chin% names(options())]
   options(opts)
diff --git a/R/utils.R b/R/utils.R
index 9d89f6f0a4..83e1ef3a3b 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -35,6 +35,16 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
         table_name, brackify(duplicate_names), domain=NA)
 }
 
+warn_if_duplicate_names = function(names_vec) {
+  # Use FALSE as the second argument so it defaults to OFF if not set
+  if (isTRUE(getOption("datatable.warn.duplicate.names", FALSE))) {
+    if (anyDuplicated(names_vec)) {
+      dups = unique(names_vec[duplicated(names_vec)])
+      warningf("Duplicate column names created: %s. This may cause ambiguity in future operations.", brackify(dups))
+    }
+  }
+}
+
 duplicated_values = function(x) {
   # fast anyDuplicated for the typical/non-error case; second duplicated() pass for (usually) error case
   if (!anyDuplicated(x)) return(vector(typeof(x)))
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index f30467dae7..99a433f651 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21515,3 +21515,12 @@ test(2365.1, melt(df_melt, id.vars=1:2), melt(dt_melt, id.vars=1:2))
 df_dcast = data.frame(a = c("x", "y"), b = 1:2, v = 3:4)
 dt_dcast = data.table(a = c("x", "y"), b = 1:2, v = 3:4)
 test(2365.2, dcast(df_dcast, a ~ b, value.var = "v"), dcast(dt_dcast, a ~ b, value.var = "v"))
+
+DT = as.data.table(iris)
+options(datatable.warn.duplicate.names = FALSE)
+test(2366.1,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))
+options(datatable.warn.duplicate.names = TRUE)
+test(2366.2,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"),warning = "Duplicate column names created")
+test(2366.3,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "New.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "New.Length", "Petal.Width", "Species"))
+options(datatable.warn.duplicate.names = FALSE)
+test(2366.4,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))

From 86993168f24bb5465da8a326df444199f1418657 Mon Sep 17 00:00:00 2001
From: venom1204 <venomplays1204@gmail.com>
Date: Wed, 25 Feb 2026 08:50:25 +0000
Subject: [PATCH 2/2] added news and doc

---
 NEWS.md                   |  2 ++
 R/data.table.R            |  6 ++----
 R/onLoad.R                |  2 +-
 R/utils.R                 | 28 ++++++++++++++++++++++------
 inst/tests/tests.Rraw     | 16 +++++++++-------
 man/data.table-options.Rd |  6 ++++++
 6 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/NEWS.md b/NEWS.md
index fd0ee8bf16..6f80c2b410 100644
--- a/NEWS.md
+++ b/NEWS.md
@@ -28,6 +28,8 @@
 
 4. `dcast()` and `melt()` "just work" when passed a data.frame, not just data.tables, with no need for coercion, [#7614](https://github.com/Rdatatable/data.table/issues/7614). Thanks @MichaelChirico for the suggestion and @manmita for the PR. Note that to avoid potential conflicts with {reshape2}'s data.frame methods, we do the dispatch to the data.table method manually.
 
+5. `setnames()` now supports a global option `datatable.unique.names` to control the creation of duplicate column names. Users can choose between `"off"` (default), `"warn"`, `"error"`, or `"rename"`. This addresses long-standing ambiguity issues when duplicate names were created silently, [#4044](https://github.com/Rdatatable/data.table/issues/4044). Thanks to @venom1204 for the PR.
+
 ### BUG FIXES
 
 1. `fread()` with `skip=0` and `(header=TRUE|FALSE)` no longer skips the first row when it has fewer fields than subsequent rows, [#7463](https://github.com/Rdatatable/data.table/issues/7463). Thanks @emayerhofer for the report and @ben-schwen for the fix.
diff --git a/R/data.table.R b/R/data.table.R
index cca9538749..e2a5731e1b 100644
--- a/R/data.table.R
+++ b/R/data.table.R
@@ -2941,18 +2941,16 @@ setnames = function(x,old,new,skip_absent=FALSE) {
     if (length(i) != length(new)) internal_error("length(i)!=length(new)") # nocov
   }
 
-  # NEW: Check for duplicates using the centralized helper in utils.R
   full_names = names(x)
   full_names[i] = new
-  warn_if_duplicate_names(full_names)
+  full_names = process_name_policy(full_names)
+  new = full_names[i]
 
-  # update the key if the column name being change is in the key
   m = chmatch(names(x)[i], key(x))
   w = which(!is.na(m))
   if (length(w))
     .Call(Csetcharvec, attr(x, "sorted", exact=TRUE), m[w], new[w])
 
-  # update secondary keys
   idx = attr(x, "index", exact=TRUE)
   for (k in names(attributes(idx))) {
     tt = strsplit(k,split="__")[[1L]][-1L]
diff --git a/R/onLoad.R b/R/onLoad.R
index e5f54ca794..ba404305b4 100644
--- a/R/onLoad.R
+++ b/R/onLoad.R
@@ -99,7 +99,7 @@
     datatable.use.index=TRUE,           # global switch to address #1422
     datatable.prettyprint.char=NULL,    # FR #1091
     datatable.old.matrix.autoname=FALSE, # #7145: how data.table(x=1, matrix(1)) is auto-named set to change
-    datatable.warn.duplicate.names=FALSE # ADD THIS LINE FOR ISSUE #4044
+    datatable.unique.names = "off"
   )
   opts = opts[!names(opts) %chin% names(options())]
   options(opts)
diff --git a/R/utils.R b/R/utils.R
index 83e1ef3a3b..e161074497 100644
--- a/R/utils.R
+++ b/R/utils.R
@@ -35,14 +35,30 @@ check_duplicate_names = function(x, table_name=deparse(substitute(x))) {
         table_name, brackify(duplicate_names), domain=NA)
 }
 
-warn_if_duplicate_names = function(names_vec) {
-  # Use FALSE as the second argument so it defaults to OFF if not set
-  if (isTRUE(getOption("datatable.warn.duplicate.names", FALSE))) {
-    if (anyDuplicated(names_vec)) {
-      dups = unique(names_vec[duplicated(names_vec)])
-      warningf("Duplicate column names created: %s. This may cause ambiguity in future operations.", brackify(dups))
+process_name_policy = function(names_vec) {
+  policy = getOption("datatable.unique.names", "off")
+  
+  if (is.null(policy) || policy == "off") return(names_vec)
+
+  allowed = c("warn", "error", "rename")
+  if (!policy %in% allowed) {
+    warningf("Invalid value for 'datatable.unique.names': [%s]. Falling back to 'off'. Allowed values are: 'off', 'warn', 'error', 'rename'.", as.character(policy))
+    return(names_vec)
+  }
+
+  if (anyDuplicated(names_vec)) {
+    dups = unique(names_vec[duplicated(names_vec)])
+    msg = sprintf("Duplicate column names created: %s. This may cause ambiguity.", brackify(dups))
+
+    if (policy == "warn") {
+      warningf(msg)
+    } else if (policy == "error") {
+      stopf(msg)
+    } else if (policy == "rename") {
+      return(make.unique(names_vec))
     }
   }
+  return(names_vec)
 }
 
 duplicated_values = function(x) {
diff --git a/inst/tests/tests.Rraw b/inst/tests/tests.Rraw
index 99a433f651..f481334d37 100644
--- a/inst/tests/tests.Rraw
+++ b/inst/tests/tests.Rraw
@@ -21516,11 +21516,13 @@ df_dcast = data.frame(a = c("x", "y"), b = 1:2, v = 3:4)
 dt_dcast = data.table(a = c("x", "y"), b = 1:2, v = 3:4)
 test(2365.2, dcast(df_dcast, a ~ b, value.var = "v"), dcast(dt_dcast, a ~ b, value.var = "v"))
 
+#4044
 DT = as.data.table(iris)
-options(datatable.warn.duplicate.names = FALSE)
-test(2366.1,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))
-options(datatable.warn.duplicate.names = TRUE)
-test(2366.2,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"),warning = "Duplicate column names created")
-test(2366.3,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "New.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "New.Length", "Petal.Width", "Species"))
-options(datatable.warn.duplicate.names = FALSE)
-test(2366.4,names({ tmp = copy(DT); setnames(tmp, "Petal.Length", "Sepal.Length"); tmp }),c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))
+options(datatable.unique.names = "off")
+test(2366.1, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"))
+options(datatable.unique.names = "warn")
+test(2366.2, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length", "Petal.Width", "Species"), warning = "Duplicate column names created")
+options(datatable.unique.names = "error")
+test(2366.3, setnames(copy(DT), "Petal.Length", "Sepal.Length"), error = "Duplicate column names created")
+options(datatable.unique.names = "rename")
+test(2366.4, names(setnames(copy(DT), "Petal.Length", "Sepal.Length")), c("Sepal.Length", "Sepal.Width", "Sepal.Length.1", "Petal.Width", "Species"))
\ No newline at end of file
diff --git a/man/data.table-options.Rd b/man/data.table-options.Rd
index 439e88ef2f..4bb477af64 100644
--- a/man/data.table-options.Rd
+++ b/man/data.table-options.Rd
@@ -105,6 +105,12 @@
     \item{\code{datatable.enlist}}{Experimental feature. Default is \code{NULL}. If set to a function
       (e.g., \code{list}), the \code{j} expression can return a \code{list}, which will then
       be "enlisted" into columns in the result.}
+    \item{\code{datatable.unique.names}}{A character string, default \code{"off"}. 
+      Controls the behavior when operations (like \code{setnames}) would result in 
+      duplicate column names. Can be \code{"off"} (silently allow duplicates), 
+      \code{"warn"} (issue a warning), \code{"error"} (halt with an error), 
+      or \code{"rename"} (automatically fix duplicates using \code{make.unique}). 
+      Invalid values will trigger a warning and fall back to \code{"off"}.}
   }
 }