SQL is not designed to perform complex string manipulation
Walking a String
We want to traverse a string to return each character as a row.
-- Oracle
WITH employee_name AS (
SELECT 'Sundar' AS FIRST_NAME FROM DUAL
)
SELECT SUBSTR(FIRST_NAME, LEVEL, 1) AS CHARACTER
FROM employee_name
CONNECT BY LEVEL <= LENGTH(FIRST_NAME);
-- Oracle
SELECT SUBSTR(FIRST_NAME, LEVEL, 1) AS CHARACTER
FROM employees
WHERE FIRST_NAME = 'Sundar'
CONNECT BY LEVEL <= LENGTH(FIRST_NAME) AND PRIOR FIRST_NAME = FIRST_NAME AND PRIOR DBMS_RANDOM.VALUE IS NOT NULL;
Embedding Quotes Within String Literals
We want to embed quote marks within string literals.
-- Oracle
select 'g''day mate' qmarks from dual union all
select 'beavers'' teeth' from dual union all
select '''' from dual
select '''' as quote from dual;
Counting the Occurrences of a Character in a String
We want to count the number of times a character or substring occurs within a given string. Consider the following string. We want to determine how many commas are in the string.
10,CLARK,MANAGER
-- Using length
select (length('10,CLARK,MANAGER')-length(replace('10,CLARK,MANAGER',',','')))/length(',') as cnt
from dual
-- Traversing through each character
WITH sample_string AS (
SELECT '10,CLARK,MANAGE,R' AS str FROM DUAL
)
select count(substr(str,LEVEL,1)) from sample_string where substr(str,LEVEL,1) = ','
connect by LEVEL < LENGTH(str) ;
Removing Unwanted Characters from a String
We want to remove specific characters from data. For example, we want to remove all zeros and vowels.
-- Oracle
select ename,
replace(translate(ename,'aaaaa','AEIOU'),'a','') as stripped1,
sal,
replace(cast(sal as char(4)),'0','') as stripped2
from emp;
-- MySQL does not support TRANSLATE function, so several calls to REPLACE needed
select ename,
replace(
replace(
replace(
replace(
replace(ename,'A',''),'E',''),'I',''),'O',''),'U','')
as stripped1,
sal,
replace(sal,0,'') stripped2
from emp;
Separating Numeric and Character Data
We have numeric data stored with character data together in one column. We want to separate the character data from the numeric data.
Sample Data
SMITH800
ALLEN1600
WARD1250
JONES2975
Sample Output
ENAME SAL
SMITH 800
ALLEN 1600
WARD 1250
JONES 2975
select
replace(translate(data,'0123456789','0000000000'),'0') ename,
to_number(replace(translate(lower(data),'abcdefghijklmnopqrstuvwxyz',rpad('z',26,'z')),'z')) sal
from (
select ename||sal data
from emp
)
Determining Whether a String Is Alphanumeric
We want to return rows from a table only when a column contains no characters other than numbers and letters.
Sample Data
-- Oracle
select data
from V
where translate(lower(data),'0123456789abcdefghijklmnopqrstuvwxyz',rpad('a',36,'a')) = rpad('a',length(data),'a')
Extracting Initials from a Name
We want to convert a full name into initials.
Input - Stewie Griffin
Output - S.G.
Step 1: S##### G######
Step 2: S G
Step 3: S.G
Step 4: S.G. (append . at the end)
We want to order result set based on a substring. For example, ordered based on the last two characters of each name.
select ename
from emp
order by substr(ename,length(ename)-1,)
Ordering by a Number in a String
We want to order the result set based on a number within a string
Sample Data
-- Oracle
select data from V order by to_number(replace(translate(lower('SMITH 7369 RESEARCH'), 'abcdefghijklmnopqrstubvwxyz ', RPAD('#',27,'#')),'#',''))
Creating a Delimited List from Table Rows
We want to return table rows as values in a delimited list rather than in vertical columns as they usually appear.
Sample Data
DEPTNO EMPS
10 CLARK
10 KING
10 MILLER
20 SMITH
20 ADAMS
20 FORD
20 SCOTT
20 JONES
30 ALLEN
-- Oracle Solution
-- Create test table
CREATE TABLE test (
deptno NUMBER(2),
empname VARCHAR2(20)
);
-- Insert test data
INSERT INTO test (deptno, empname) VALUES (10, 'CLARK');
INSERT INTO test (deptno, empname) VALUES (10, 'KING');
INSERT INTO test (deptno, empname) VALUES (10, 'MILLER');
INSERT INTO test (deptno, empname) VALUES (20, 'SMITH');
INSERT INTO test (deptno, empname) VALUES (20, 'ADAMS');
INSERT INTO test (deptno, empname) VALUES (20, 'FORD');
INSERT INTO test (deptno, empname) VALUES (20, 'SCOTT');
INSERT INTO test (deptno, empname) VALUES (20, 'JONES');
INSERT INTO test (deptno, empname) VALUES (30, 'ALLEN');
INSERT INTO test (deptno, empname) VALUES (30, 'BLAKE');
INSERT INTO test (deptno, empname) VALUES (30, 'MARTIN');
INSERT INTO test (deptno, empname) VALUES (30, 'JAMES');
INSERT INTO test (deptno, empname) VALUES (30, 'TURNER');
INSERT INTO test (deptno, empname) VALUES (30, 'WARD');
-- Run the select query
SELECT
deptno,
ltrim(sys_connect_by_path(empname, ','),
',') emps
FROM
(
SELECT
deptno,
empname,
ROW_NUMBER()
OVER(PARTITION BY deptno
ORDER BY
deptno
) rn,
COUNT(*)
OVER(PARTITION BY deptno) cnt
FROM
test
)
WHERE
level = cnt
START WITH
rn = 1
CONNECT BY PRIOR deptno = deptno
AND PRIOR rn = rn - 1
The purpose of the rank (aliased RN in the query) is to allow us to walk the tree. Since the function ROW_NUMBER generates an enumeration starting from one with no duplicates or gaps, just subtract one (from the current value) to reference a prior (or parent) row. For example, the number prior to 3 is 3 minus 1, which equals 2. In this context, 2 is the parent of 3.
Output
Output of inner subquery
Converting Delimited Data into a Multivalued IN-List
We have delimited data that we want to pass to the IN-list iterator of a WHERE clause. Consider the following string: 7654,7698,7782,7788 We would like to use the string in a WHERE clause, but the following SQL fails because EMPNO is a numeric column:
select ename,sal,deptno from emp where empno in ( '7654,7698,7782,7788' )
This SQL fails because, while EMPNO is a numeric column, the IN list is composed of a single string value. We want that string to be treated as a comma-delimited list of numeric values like 7654,7698,7782,7788.
-- Oracle
select empno,ename,sal,deptno
from emp
where empno in (
select to_number(
rtrim(
substr(emps,
instr(emps,',',1,iter.pos)+1,
instr(emps,',',1,iter.pos+1)
instr(emps,',',1,iter.pos)),',')) emps
from (select ','||'7654,7698,7782,7788'||',' emps from dual) csv,
(select rownum pos from emp) iter
where iter.pos <= ((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1
)
Explanation
Step 1: Walk the string
The number of rows returned represents the number of values in the list. The strings are parsed using SUBSTR and INSTR. POS is used to locate the nth occurrence of the delimiter in each string. By enclosing the strings in commas, no special checks are necessary to determine the beginning or end of a string.
select emps,pos
from (select ','||'7654,7698,7782,7788'||',' emps
from dual) csv,
(select rownum pos from emp) iter
where iter.pos <= ((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1
select substr(emps,
instr(emps,',',1,iter.pos)+1,
instr(emps,',',1,iter.pos+1)
instr(emps,',',1,iter.pos)) emps
from (select ','||'7654,7698,7782,7788'||',' emps
from dual) csv,
(select rownum pos from emp) iter
where iter.pos <=
((length(csv.emps)-length(replace(csv.emps,',')))/length(','))-1
The final step is to remove the trailing comma from each value, cast it to a number, and plug it into a subquery.
Alphabetizing a String
We want alphabetize the individual characters within strings in your tables.
Sample Date
ENAME
ADAMS
ALLEN
BLAKE
Result
OLD_NAME NEW_NAME
ADAMS AADMS
ALLEN AELLN
BLAKE ABEKL
-- Oracle
select old_name, new_name
from (
select old_name, replace(sys_connect_by_path(c,' '),' ') new_name
from (
select e.ename old_name,
row_number() over(partition by e.ename order by substr(e.ename,iter.pos,1)) rn,
substr(e.ename,iter.pos,1) c
from emp e,
( select rownum pos from emp ) iter
where iter.pos <= length(e.ename) order by 1
) x
start with rn = 1
connect by prior rn = rn-1 and prior old_name = old_name
)
where length(old_name) = length(new_name)
Explanation
Inline view result by walking the string and then imposing order on those characters. The rest of the query attach the names back together.
The next step is to take the alphabetized characters and rebuild each name. This is done with the function SYS_CONNECT_BY_PATH by appending each character to the ones before it
The final step is to keep only the strings that have the same length as the names they were built from.
Identifying Strings That Can Be Treated as Numbers
We have a column that is defined to hold character data.
Sample Data
CL10AR
KI10NG
MI10LL
7369
7566
We want to return rows that are numbers only, or that contain at least one number. If the numbers are mixed with character data, we want to remove the characters and return only the numbers.
Output
10
10
10
7369
7566
-- Oracle
select to_number (
case
when
replace(translate(mixed,'0123456789','9999999999'),'9') is not null
then
replace(
translate(mixed,
replace(
translate(mixed,'0123456789','9999999999')
,'9'),rpad('#',length(mixed),'#')),'#')
else
mixed
end
) mixed from V
where instr(translate(mixed,'0123456789','9999999999'),'9') > 0
Explanation
Extracting the nth Delimited Substring
We want to extract a specified, delimited substring from a string.
Sample Data
create view V as
select 'mo,larry,curly' as name
from t1
union all
select 'tina,gina,jaunita,regina,leena' as name
from t1
Sample Output (We want to extract the second name in each row)
-- Oracle (Solution 1)
select sub
from (
select iter.pos,
src.name,
substr( src.name,
instr( src.name,',',1,iter.pos )+1,
instr( src.name,',',1,iter.pos+1 ) -
instr( src.name,',',1,iter.pos )-1) sub
from (select ','||name||',' as name from V) src,
(select rownum pos from emp) iter
where iter.pos < length(src.name)-length(replace(src.name,','))
)
where pos = 2
-- Oracle (Solution 2)
select data,
substr(data, INSTR(data,',',1,1) + 1, INSTR(data,',',1,2) - INSTR(data,',',1,1) - 1)
from test_data
Parsing an IP Address
We want to parse an IP address’s fields into columns
-- Oracle
select ip,
substr(ip, 1, instr(ip,'.')-1 ) a,
substr(ip, instr(ip,'.')+1, instr(ip,'.',1,2)-instr(ip,'.')-1 ) b,
substr(ip, instr(ip,'.',1,2)+1,instr(ip,'.',1,3)-instr(ip,'.',1,2)-1 ) c,
substr(ip, instr(ip,'.',1,3)+1 ) d
from (select '92.111.0.2' as ip from t1)
Comparing Strings by Sound
SQL provides a way to represent the way words sound, which allows you to find strings that sound the same even though the underlying characters aren’t identical.
For example, you have a list of authors’ names,
Sample Output
-- Oracle
select an1.a_name as name1, an2.a_name as name2,
SOUNDEX(an1.a_name) as Soundex_Name
from author_names an1
join author_names an2
on (SOUNDEX(an1.a_name)=SOUNDEX(an2.a_name)
and an1.a_name not like an2.a_name)
Finding Text Not Matching a Pattern
We have a text field that contains some structured text values (e.g., phone numbers), and want to find occurrences where those values are structured incorrectly. We want to list rows having invalidly formatted phone numbers.
Record with 7369 is invalid because its phone number uses two different separator characters.
-- Oracle
select emp_id, text
from employee_comment
where regexp_like(text, '[0-9]{3}[-. ][0-9]{3}[-. ][0-9]{4}')
and regexp_like(
regexp_replace(text,'[0-9]{3}([-. ])[0-9]{3}\1[0-9]{4}','***'), '[0-9]{3}[-. ][0-9]{3}[-. ][0-9]{4}')