CREATE VIEW tem_wafw00f_original_txt AS
WITH RECURSIVE
-- Recursively split content into trimmed lines with a sequential line number
split_lines(uniform_resource_id, uri, line, rest, ln) AS (
SELECT
uniform_resource_id,
uri,
trim(
CASE
WHEN instr(content, char(10)) > 0 THEN substr(content, 1, instr(content, char(10)) - 1)
ELSE content
END
) AS line,
CASE
WHEN instr(content, char(10)) > 0 THEN substr(content, instr(content, char(10)) + 1)
ELSE ''
END AS rest,
1 AS ln
FROM uniform_resource
WHERE uri LIKE '%wafw00f%'
UNION ALL
SELECT
uniform_resource_id,
uri,
trim(
CASE
WHEN instr(rest, char(10)) > 0 THEN substr(rest, 1, instr(rest, char(10)) - 1)
ELSE rest
END
) AS line,
CASE
WHEN instr(rest, char(10)) > 0 THEN substr(rest, instr(rest, char(10)) + 1)
ELSE ''
END AS rest,
ln + 1
FROM split_lines
WHERE rest <> ''
),
-- Find the start line number for each block (lines that begin with "[*] Checking ")
block_start AS (
SELECT uniform_resource_id, uri, ln AS start_ln
FROM split_lines
WHERE line LIKE '[*] Checking %'
),
-- Assign each marker line to the most recent block_start before it.
-- Also filter to include only marker lines that start with one of the allowed prefixes.
block_lines AS (
SELECT s.uniform_resource_id,
s.uri,
b.start_ln AS block_ln,
s.line
FROM split_lines s
JOIN block_start b
ON s.uniform_resource_id = b.uniform_resource_id
AND s.ln >= b.start_ln
AND NOT EXISTS (
SELECT 1 FROM block_start b2
WHERE b2.uniform_resource_id = s.uniform_resource_id
AND b2.start_ln > b.start_ln
AND b2.start_ln <= s.ln
)
WHERE
-- keep only marker lines:
s.line LIKE '[*]%' OR
s.line LIKE '[+]%' OR
s.line LIKE '[-]%' OR
s.line LIKE '[~]%'
)
-- Aggregate lines into one block per row; order by the block start line
SELECT
uniform_resource_id,
uri,
group_concat(line, char(10)) AS block_content
FROM block_lines
GROUP BY uniform_resource_id, uri, block_ln
ORDER BY uniform_resource_id, block_ln