forked from dylan-sutton-chavez/edge-python
-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathseeds.sh
More file actions
84 lines (80 loc) · 1.45 KB
/
seeds.sh
File metadata and controls
84 lines (80 loc) · 1.45 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
#!/usr/bin/env bash
# Regenerates the fuzz inputs from the single source of truth (tests/cases/vm.json). `in/` (seed corpus) and `edge.dict` are gitignored artifacts; run this once before fuzzing. Pure bash, no extra runtime.
set -euo pipefail
cd "$(dirname "$0")"
# Seed corpus: one file per unique `src` in the VM test fixtures. grep -oP pulls each JSON string body (handling \" and \\); sed unescapes the common escapes.
rm -rf in && mkdir -p in
while IFS= read -r raw; do
src=$(printf '%s' "$raw" | sed -e 's/\\\\/\x01/g' \
-e 's/\\n/\n/g' -e 's/\\t/\t/g' -e 's/\\r/\r/g' -e 's/\\"/"/g' \
-e 's/\x01/\\/g')
[ -z "$src" ] && continue
name=$(printf '%s' "$src" | sha1sum | cut -c1-16)
printf '%s' "$src" > "in/$name"
done < <(grep -oP '"src":\s*"\K(?:[^"\\]|\\.)*' ../tests/cases/vm.json)
echo "seeds: $(ls in | wc -l)"
# Token dictionary: keywords, operators, and builtins for the AFL++ mutator.
cat > edge.dict <<'DICT'
# keywords
"if"
"else"
"elif"
"for"
"while"
"def"
"class"
"return"
"import"
"from"
"try"
"except"
"with"
"yield"
"async"
"await"
"pass"
"break"
"continue"
"True"
"False"
"None"
"and"
"or"
"not"
"in"
"is"
"lambda"
"assert"
"del"
"raise"
# operators and punctuation
"->"
":="
"=="
"!="
"<="
">="
"**"
"//"
"<<"
">>"
"+="
"-="
"*="
"/="
"..."
# builtins and common identifiers
"print("
"len("
"range("
"int("
"str("
"list("
"dict("
"set("
"input("
"self"
"__init__"
"f\"{"
DICT
echo "dict: $(grep -c '^"' edge.dict) entries"