Coverage for mindsdb / api / executor / utilities / mysql_to_duckdb_functions.py: 9%
237 statements
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
« prev ^ index » next coverage.py v7.13.1, created at 2026-01-21 00:36 +0000
1import re
2from mindsdb_sql_parser.ast import Identifier, Function, Constant, BinaryOperation, Interval, ASTNode, UnaryOperation
5# ---- helper -----
8def cast(node: ASTNode, typename: str) -> BinaryOperation:
9 return BinaryOperation("::", args=[node, Identifier(typename)])
12def date_part(node, part):
13 """
14 Wrap element into DATE_PART function
16 Docs:
17 https://duckdb.org/docs/stable/sql/functions/date#date_partpart-date
18 """
19 node.args = apply_nested_functions(node.args)
21 if len(node.args) != 1:
22 raise ValueError(f"Wrong arguments: {node.args}")
24 return Function("DATE_PART", args=[Constant(part), cast(node.args[0], "date")])
27# ------------------------------
30def char_fn(node: Function) -> Function | None:
31 """Replace MySQL's multy-arg CHAR call to chain of DuckDB's CHR calls
33 Example:
34 CHAR(77, 78, 79) => CHR(77) || CHR(78) || CHR(79)
36 Args:
37 node (Function): Function node to adapt
39 Returns:
40 Function | None: Adapted function node
41 """
42 if len(node.args) == 1:
43 node.op = "chr"
44 return node
46 acc = None
47 for arg in node.args:
48 fn = Function(op="chr", args=[arg])
49 if acc is None:
50 acc = fn
51 continue
52 acc = BinaryOperation("||", args=[acc, fn])
54 acc.parentheses = True
55 acc.alias = node.alias
56 return acc
59def locate_fn(node: Function) -> Function | None:
60 """Replace MySQL's LOCATE (or INSTR) call to DuckDB's STRPOS call
62 Example:
63 LOCATE('bar', 'foobarbar') => STRPOS('foobarbar', 'bar')
64 INSTR('foobarbar', 'bar') => STRPOS('foobarbar', 'bar')
65 LOCATE('bar', 'foobarbar', 3) => ValueError (there is no analogue in DuckDB)
67 Args:
68 node (Function): Function node to adapt
70 Returns:
71 Function | None: Adapted function node
73 Raises:
74 ValueError: If the function has 3 arguments
75 """
76 if len(node.args) == 3:
77 raise ValueError("MySQL LOCATE function with 3 arguments is not supported")
78 if node.op == "locate":
79 node.args = [node.args[1], node.args[0]]
80 elif node.op == "insrt":
81 node.args = [node.args[0], node.args[1]]
82 node.op = "strpos"
85def unhex_fn(node: Function) -> None:
86 """Check MySQL's UNHEX function call arguments to ensure they are strings,
87 because DuckDB's UNHEX accepts only string arguments, while MySQL's UNHEX can accept integer arguments.
88 NOTE: if return dataframe from duckdb then unhex values are array - this may be an issue
90 Args:
91 node (Function): Function node to adapt
93 Returns:
94 None
96 Raises:
97 ValueError: If the function argument is not a string
98 """
99 for arg in node.args:
100 if not isinstance(arg, (str, bytes)):
101 raise ValueError("MySQL UNHEX function argument must be a string")
104def format_fn(node: Function) -> None:
105 """Adapt MySQL's FORMAT function to DuckDB's FORMAT function
107 Example:
108 FORMAT(1234567.89, 0) => FORMAT('{:,.0f}', 1234567.89)
109 FORMAT(1234567.89, 2) => FORMAT('{:,.2f}', 1234567.89)
110 FORMAT(name, 2) => FORMAT('{:,.2f}', name)
111 FORMAT('{:.2f}', 1234567.89) => FORMAT('{:,.2f}', 1234567.89) # no changes for original style
113 Args:
114 node (Function): Function node to adapt
116 Returns:
117 None
119 Raises:
120 ValueError: If MySQL's function has 3rd 'locale' argument, like FORMAT(12332.2, 2, 'de_DE')
121 """
122 match node.args[0], node.args[1]:
123 case Constant(value=(int() | float())), Constant(value=int()):
124 ...
125 case Identifier(), Constant(value=int()):
126 ...
127 case _:
128 return node
130 if len(node.args) > 2:
131 raise ValueError("'locale' argument of 'format' function is not supported")
132 decimal_places = node.args[1].value
134 if isinstance(node.args[0], Constant):
135 node.args[1].value = node.args[0].value
136 node.args[0].value = f"{{:,.{decimal_places}f}}"
137 else:
138 node.args[1] = node.args[0]
139 node.args[0] = Constant(f"{{:,.{decimal_places}f}}")
142def sha2_fn(node: Function) -> None:
143 """Adapt MySQL's SHA2 function to DuckDB's SHA256 function
145 Example:
146 SHA2('test', 256) => SHA256('test')
148 Args:
149 node (Function): Function node to adapt
151 Returns:
152 None
154 Raises:
155 ValueError: If the function has more than 1 argument or the argument is not 256
156 """
157 if len(node.args) > 1 and node.args[1].value != 256:
158 raise ValueError("Only sha256 is supported")
159 node.op = "sha256"
160 node.args = [node.args[0]]
163def length_fn(node: Function) -> None:
164 """Adapt MySQL's LENGTH function to DuckDB's STRLEN function
165 NOTE: duckdb also have LENGTH, therefore it can not be used
167 Example:
168 LENGTH('test') => STRLEN('test')
170 Args:
171 node (Function): Function node to adapt
173 Returns:
174 None
175 """
176 node.op = "strlen"
179def regexp_substr_fn(node: Function) -> None:
180 """Adapt MySQL's REGEXP_SUBSTR function to DuckDB's REGEXP_EXTRACT function
182 Example:
183 REGEXP_SUBSTR('foobarbar', 'bar', 1, 1) => REGEXP_EXTRACT('foobarbar', 'bar')
185 Args:
186 node (Function): Function node to adapt
188 Returns:
189 None
191 Raises:
192 ValueError: If the function has more than 2 arguments or 3rd or 4th argument is not 1
193 """
194 if (
195 len(node.args) == 3
196 and node.args[2].value != 1
197 or len(node.args) == 4
198 and (node.args[3].value != 1 or node.args[2].value != 1)
199 or len(node.args) > 4
200 ):
201 raise ValueError("Only 2 arguments are supported for REGEXP_SUBSTR function")
202 node.args = node.args[:2]
203 node.op = "regexp_extract"
206def substring_index_fn(node: Function) -> BinaryOperation | Function:
207 """Adapt MySQL's SUBSTRING_INDEX function to DuckDB's SPLIT_PART function
209 Example:
210 SUBSTRING_INDEX('a.b.c.d', '.', 1) => SPLIT_PART('a.b.c.d', '.', 1)
211 SUBSTRING_INDEX('a.b.c.d', '.', 2) => CONCAT_WS('.', SPLIT_PART('a.b.c.d', '.', 1), SPLIT_PART('a.b.c.d', '.', 2))
213 Args:
214 node (Function): Function node to adapt
216 Returns:
217 BinaryOperation | Function: Binary operation node or function node
219 Raises:
220 ValueError: If the function has more than 3 arguments or the 3rd argument is not 1
221 """
222 if len(node.args[1].value) > 1:
223 raise ValueError("Only one car in separator")
225 if node.args[2].value == 1:
226 node.op = "split_part"
227 return node
229 acc = [node.args[1]]
230 for i in range(node.args[2].value):
231 fn = Function(op="split_part", args=[node.args[0], node.args[1], Constant(i + 1)])
232 acc.append(fn)
234 acc = Function(op="concat_ws", args=acc)
235 acc.alias = node.alias
236 return acc
239def curtime_fn(node: Function) -> BinaryOperation:
240 """Adapt MySQL's CURTIME function to DuckDB's GET_CURRENT_TIME function.
241 To get the same type as MySQL's CURTIME function, we need to cast the result to time type.
243 Example:
244 CURTIME() => GET_CURRENT_TIME()::time
246 Args:
247 node (Function): Function node to adapt
249 Returns:
250 BinaryOperation: Binary operation node
251 """
252 return cast(Function(op="get_current_time", args=[]), "time")
255def timestampdiff_fn(node: Function) -> None:
256 """Adapt MySQL's TIMESTAMPDIFF function to DuckDB's DATE_DIFF function
257 NOTE: Looks like cast string args to timestamp works in most cases, but there may be some exceptions.
259 Example:
260 TIMESTAMPDIFF(YEAR, '2000-02-01', '2003-05-01') => DATE_DIFF('year', timestamp '2000-02-01', timestamp '2003-05-01')
262 Args:
263 node (Function): Function node to adapt
265 Returns:
266 None
267 """
268 node.op = "date_diff"
269 node.args[0] = Constant(node.args[0].parts[0])
270 node.args[1] = cast(node.args[1], "timestamp")
271 node.args[2] = cast(node.args[2], "timestamp")
274def extract_fn(node: Function) -> None:
275 """Adapt MySQL's EXTRACT function to DuckDB's EXTRACT function
276 TODO: multi-part args, like YEAR_MONTH, is not supported yet
277 NOTE: Looks like adding 'timestamp' works in most cases, but there may be some exceptions.
279 Example:
280 EXTRACT(YEAR FROM '2000-02-01') => EXTRACT('year' from timestamp '2000-02-01')
282 Args:
283 node (Function): Function node to adapt
285 Returns:
286 None
287 """
288 part = node.args[0].parts[0]
289 if part.upper() == "YEAR_MONTH":
290 node.args = apply_nested_functions([node.from_arg, Constant("%Y%m")])
291 node.from_arg = None
292 date_format_fn(node)
293 return cast(node, "int")
294 elif part.upper() == "DAY_MINUTE":
295 node.args = apply_nested_functions([node.from_arg, Constant("%e%H%i")])
296 node.from_arg = None
297 date_format_fn(node)
298 return cast(node, "int")
299 else:
300 node.args[0] = Constant(part)
301 if not isinstance(node.from_arg, Identifier):
302 node.from_arg = cast(node.from_arg, "timestamp")
305def get_format_fn(node: Function) -> Constant:
306 """
307 Replace function with a constant according to table:
308 Important! The parameters can be only constants.
310 Example: GET_FORMAT(DATE, 'USA') => '%m.%d.%Y'
312 Docs:
313 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_get-format
314 """
316 if len(node.args) != 2:
317 raise ValueError("MySQL GET_FORMAT supports only 2 arguments")
319 arg1, arg2 = node.args
321 if not isinstance(arg1, Identifier) and len(arg1.parts) != 1:
322 raise ValueError(f"Unknown type: {arg1}")
324 if not isinstance(arg2, Constant):
325 raise ValueError(f"Unknown format name: {arg2}")
327 match arg1.parts[0].upper(), arg2.value.upper():
328 case "DATE", "USA":
329 value = "%m.%d.%Y"
330 case "DATE", "JIS":
331 value = "%Y-%m-%d"
332 case "DATE", "ISO":
333 value = "%Y-%m-%d"
334 case "DATE", "EUR":
335 value = "%d.%m.%Y"
336 case "DATE", "INTERNAL":
337 value = "%Y%m%d"
339 case "DATETIME", "USA":
340 value = "%Y-%m-%d %H.%i.%s"
341 case "DATETIME", "JIS":
342 value = "%Y-%m-%d %H:%i:%s"
343 case "DATETIME", "ISO":
344 value = "%Y-%m-%d %H:%i:%s"
345 case "DATETIME", "EUR":
346 value = "%Y-%m-%d %H.%i.%s"
347 case "DATETIME", "INTERNAL":
348 value = "%Y%m%d%H%i%s"
350 case "TIME", "USA":
351 value = "%h:%i:%s %p"
352 case "TIME", "JIS":
353 value = "%H:%i:%s"
354 case "TIME", "ISO":
355 value = "%H:%i:%s"
356 case "TIME", "EUR":
357 value = "%H.%i.%s"
358 case "TIME", "INTERNAL":
359 value = "%H%i%s"
361 case _:
362 value = ""
364 return Constant(value)
367def date_format_fn(node: Function):
368 """
369 Adapt to strftime function and convert keys in format string.
371 DATE_FORMAT('2009-10-04 22:23:00', '%W %M %Y')
372 =>
373 strftime('2009-10-04 22:23:00'::datetime, '%A %B %Y')
375 Docs:
376 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-format
377 https://duckdb.org/docs/stable/sql/functions/timestamp.html#strftimetimestamp-format
378 https://duckdb.org/docs/stable/sql/functions/dateformat.html#format-specifiers
379 """
380 specifiers_map = {
381 "%c": "%-m", # Month, numeric (0..12) -> Month as decimal
382 "%D": "%-d", # Day with English suffix -> Day as decimal (no suffix in DuckDB)
383 "%e": "%-d", # Day of month (0..31) -> Day as decimal
384 "%h": "%I", # Hour (01..12)
385 "%i": "%M", # Minutes
386 "%j": "%j", # Day of year
387 "%k": "%-H", # Hour (0..23) -> Hour as decimal
388 "%l": "%-I", # Hour (1..12) -> Hour as decimal
389 "%M": "%B", # Month name -> Full month name
390 "%r": "%I:%M:%S %p", # Time, 12-hour
391 "%s": "%S", # Seconds
392 "%T": "%X", # Time, 24-hour
393 "%u": "%V", # Week, mode 1, Monday is first day, can be wrong in the edges of year
394 "%v": "%V", # Week, mode 3, Monday is first day
395 "%V": "%U", # Week, mode 2, Sunday is first day, can be wrong in the edges of year
396 "%W": "%A", # Weekday name -> Full weekday name
397 "%X": "%G", # Year for week
398 "%x": "%G", # Year for week
399 }
400 node.op = "strftime"
402 node.args = apply_nested_functions(node.args)
404 if len(node.args) != 2 or not isinstance(node.args[1], Constant):
405 raise ValueError(f"Wrong arguments: {node.args}")
407 def repl_f(match):
408 specifier = match.group()
409 return specifiers_map.get(specifier, specifier)
411 # adapt format string
412 node.args[1].value = re.sub(r"%[a-zA-Z]", repl_f, node.args[1].value)
414 # add type casting
415 node.args[0] = cast(node.args[0], "timestamp")
418def from_unixtime_fn(node):
419 """
420 Adapt to make_timestamp function
421 FROM_UNIXTIME(1447430881) => make_timestamp((1447430881::int8 *1000000))
423 Docs:
424 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-unixtime
425 https://duckdb.org/docs/stable/sql/functions/timestamp#make_timestampmicroseconds
426 """
428 if len(node.args) != 1:
429 raise ValueError(f"Wrong arguments: {node.args}")
431 node.op = "make_timestamp"
433 node.args[0] = BinaryOperation("*", args=[cast(node.args[0], "int8"), Constant(1_000_000)])
436def from_days_fn(node):
437 """
438 Adapt to converting days to interval and adding to first day of the 0 year:
439 FROM_DAYS(735669) => '0000-01-01'::date + (735669 * INTERVAL '1 day')
441 Docs:
442 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_from-days
443 """
444 node.args = apply_nested_functions(node.args)
446 if len(node.args) != 1:
447 raise ValueError(f"Wrong arguments: {node.args}")
449 return BinaryOperation(
450 op="+",
451 args=[
452 BinaryOperation("::", args=[Constant("0000-01-01"), Identifier("date")]),
453 BinaryOperation("*", args=[node.args[0], Interval("1 day")]),
454 ],
455 )
458def dayofyear_fn(node):
459 """
460 Addapt to DATE_PART:
461 DAYOFYEAR('2007-02-03') => DATE_PART('doy', '2007-02-03'::date)
463 Docs:
464 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofyear
465 """
467 return date_part(node, "doy")
470def dayofweek_fn(node):
471 """
472 Addapt to DATE_PART:
473 DAYOFWEEK('2007-02-03'); => DATE_PART('dow', '2007-02-03'::date) + 1;
475 Docs:
476 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofweek
477 """
478 return BinaryOperation("+", args=[date_part(node, "dow"), Constant(1)])
481def dayofmonth_fn(node):
482 """
483 Addapt to DATE_PART:
484 DAYOFMONTH('2007-02-03') => DATE_PART('day', '2007-02-03'::date)
486 Docs:
487 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayofmonth
488 """
490 return date_part(node, "day")
493def dayname_fn(node):
494 """
495 Use the same function with type casting
496 DAYNAME('2007-02-03') => DAYNAME('2007-02-03'::date)
498 Docs:
499 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_dayname
500 """
501 if len(node.args) != 1:
502 raise ValueError(f"Wrong arguments: {node.args}")
504 node.args[0] = cast(node.args[0], "date")
507def curdate_fn(node):
508 """
509 Replace the name of the function
510 CURDATE() => CURRENT_DATE()
512 Docs:
513 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_curdate
514 https://duckdb.org/docs/stable/sql/functions/date.html#current_date
515 """
516 node.op = "CURRENT_DATE"
519def datediff_fn(node):
520 """
521 Change argument's order and cast to date:
522 DATEDIFF('2007-12-31 23:59:59','2007-11-30') => datediff('day',DATE '2007-11-30', DATE '2007-12-31 23:59:59')
524 Docs:
525 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_datediff
526 https://duckdb.org/docs/stable/sql/functions/date#date_diffpart-startdate-enddate
528 """
529 if len(node.args) != 2:
530 raise ValueError(f"Wrong arguments: {node.args}")
532 arg1, arg2 = node.args
533 node.args = [Constant("day"), cast(arg2, "date"), cast(arg1, "date")]
536def adddate_fn(node):
537 """
538 Replace the name of the function and add type casting
539 Important! The second parameter can be only interval (not count of days).
540 SELECT ADDDATE('2008-01-02', INTERVAL 31 DAY) => SELECT DATE_ADD('2008-01-02'::date, INTERVAL 31 DAY)
542 Docs:
543 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_adddate
544 https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval
545 """
546 if len(node.args) != 2:
547 raise ValueError(f"Wrong arguments: {node.args}")
549 node.op = "DATE_ADD"
550 node.args[0] = cast(node.args[0], "timestamp")
553def date_sub_fn(node):
554 """
555 Use DATE_ADD with negative interval
556 SELECT DATE_SUB('1998-01-02', INTERVAL 31 DAY) => select DATE_ADD('1998-01-02'::date, -INTERVAL 31 DAY)
558 Docs:
559 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_date-add
560 https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval
561 """
562 if len(node.args) != 2:
563 raise ValueError(f"Wrong arguments: {node.args}")
565 node.op = "DATE_ADD"
566 node.args[0] = cast(node.args[0], "timestamp")
567 node.args[1] = UnaryOperation("-", args=[node.args[1]])
570def addtime_fn(node):
571 """
572 Convert second parameter into interval.
573 Important!
574 - The second parameter can be only a constant.
575 - The first parameter can be only date/datetime (not just time)
577 ADDTIME('2007-12-31', '1 1:1:1.2')
578 =>
579 DATE_ADD('2007-12-31'::timestamp, INTERVAL '1 day 1 hour 1 minute 1.2 second')
581 Docs:
582 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_addtime
583 https://duckdb.org/docs/stable/sql/functions/date.html#date_adddate-interval
584 """
585 node.args = apply_nested_functions(node.args)
587 if len(node.args) != 2:
588 raise ValueError(f"Wrong arguments: {node.args}")
590 interval = node.args[1]
591 if not isinstance(interval, Constant) or not isinstance(interval.value, str):
592 raise ValueError(f"The second argument have to be string: {node.args[1]}")
594 pattern = r"^(?:(\d+)\s+)?(?:(\d+):)?(?:(\d+):)?(\d+)(?:\.(\d+))?$"
596 match = re.match(pattern, interval.value)
597 if not match:
598 raise ValueError(f"Invalid MySQL time format: {interval.value}")
600 # Extract components
601 days, hours, minutes, seconds, fractional = match.groups()
602 # Build interval string
603 parts = []
604 if days and int(days) > 0:
605 parts.append(f"{days} day")
607 if hours and int(hours) > 0:
608 parts.append(f"{int(hours)} hour")
610 if minutes and int(minutes) > 0:
611 parts.append(f"{int(minutes)} minute")
613 seconds = int(seconds) if seconds else 0
614 fractional = float(f"0.{fractional}") if fractional else 0.0
615 total_seconds = seconds + fractional
616 if total_seconds > 0:
617 seconds_str = str(total_seconds).rstrip("0").rstrip(".")
618 parts.append(f"{seconds_str} second")
620 # If all components are zero, return 0 seconds
621 if not parts:
622 interval_str = "0 second"
623 else:
624 interval_str = " ".join(parts)
626 return Function(
627 "DATE_ADD",
628 args=[
629 cast(node.args[0], "timestamp"),
630 Interval(interval_str),
631 ],
632 )
635def convert_tz_fn(node):
636 """
637 Concatenate timezone to first argument and cast it as timestamptz. Then use `timezone` function
638 Important! Duckdb doesn't recognize timezones in digital formats: +10:00
640 CONVERT_TZ('2004-01-01 12:00:00','GMT','MET')
641 =>
642 timezone('MET', ('2004-01-01 12:00:00' || ' ' || 'GMT')::timestamptz);
644 Docs:
645 https://dev.mysql.com/doc/refman/8.4/en/date-and-time-functions.html#function_convert-tz
646 https://duckdb.org/docs/stable/sql/functions/timestamptz.html#timezonetext-timestamp
647 """
648 node.args = apply_nested_functions(node.args)
650 if len(node.args) != 3:
651 raise ValueError(f"Wrong arguments: {node.args}")
653 date, tzfrom, tzto = node.args
655 # concatenate tz name: date || ' ' || tzfrom
656 tzdate = BinaryOperation("||", args=[BinaryOperation("||", args=[date, Constant(" ")]), tzfrom], parentheses=True)
658 return Function(
659 "timezone",
660 args=[
661 tzto,
662 cast(tzdate, "timestamptz"),
663 ],
664 )
667def apply_nested_functions(args):
668 args2 = []
669 for arg in args:
670 if isinstance(arg, Function):
671 fnc = mysql_to_duckdb_fnc(arg)
672 if args2 is not None:
673 arg = fnc(arg)
674 args2.append(arg)
675 return args2
678def mysql_to_duckdb_fnc(node):
679 fnc_name = node.op.lower()
681 mysql_to_duck_fn_map = {
682 "char": char_fn,
683 "locate": locate_fn,
684 "insrt": locate_fn,
685 "unhex": unhex_fn,
686 "format": format_fn,
687 "sha2": sha2_fn,
688 "length": length_fn,
689 "regexp_substr": regexp_substr_fn,
690 "substring_index": substring_index_fn,
691 "curtime": curtime_fn,
692 "timestampdiff": timestampdiff_fn,
693 "extract": extract_fn,
694 "get_format": get_format_fn,
695 "date_format": date_format_fn,
696 "from_unixtime": from_unixtime_fn,
697 "from_days": from_days_fn,
698 "dayofyear": dayofyear_fn,
699 "dayofweek": dayofweek_fn,
700 "day": dayofmonth_fn,
701 "dayofmonth": dayofmonth_fn,
702 "dayname": dayname_fn,
703 "curdate": curdate_fn,
704 "datediff": datediff_fn,
705 "adddate": adddate_fn,
706 "date_sub": date_sub_fn,
707 "date_add": adddate_fn,
708 "addtime": addtime_fn,
709 "convert_tz": convert_tz_fn,
710 }
711 if fnc_name in mysql_to_duck_fn_map:
712 return mysql_to_duck_fn_map[fnc_name]