Skip to content

Commit 65c4fa2

Browse files
committed
Fixed bug where "Hänggi-P" did not encode as "Hä/ng/gi/-P/"
1 parent a703cc6 commit 65c4fa2

File tree

3 files changed

+94
-52
lines changed

3 files changed

+94
-52
lines changed

TODO.md

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,3 +8,8 @@
88
## Someday, Maybe
99

1010
+ [ ] Look at specific recommendations for the ppath and object mapping to support OCFL, integrate with pairpath
11+
12+
## Completed
13+
14+
+ [x] "Hänggi-P" should encode as "Hä/ng/gi/-P/", getting bad unicode split on a with umlat
15+

pairtree.go

Lines changed: 70 additions & 51 deletions
Original file line numberDiff line numberDiff line change
@@ -24,93 +24,112 @@ import (
2424
"strings"
2525
)
2626

27+
const (
28+
Version = `v0.0.2`
29+
)
30+
2731
var (
28-
stepOneEncoding = map[string]string{
29-
" ": "^20",
30-
"\"": "^22",
31-
"<": "^3c",
32-
"\\": "^5c",
33-
"*": "^2a",
34-
"=": "^3d",
35-
"^": "^5e",
36-
"+": "^2b",
37-
">": "^3e",
38-
"|": "^7c",
39-
",": "^2c",
40-
"?": "^3f",
32+
stepOneEncoding = map[rune][]rune{
33+
' ': []rune("^20"),
34+
'"': []rune("^22"),
35+
'<': []rune("^3c"),
36+
'\\': []rune("^5c"),
37+
'*': []rune("^2a"),
38+
'=': []rune("^3d"),
39+
'^': []rune("^5e"),
40+
'+': []rune("^2b"),
41+
'>': []rune("^3e"),
42+
'|': []rune("^7c"),
43+
',': []rune("^2c"),
44+
'?': []rune("^3f"),
4145
}
42-
stepTwoEncoding = map[string]string{
43-
"/": "=",
44-
":": "+",
45-
".": ",",
46+
stepTwoEncoding = map[rune]rune{
47+
'/': '=',
48+
':': '+',
49+
'.': ',',
4650
}
4751
)
4852

49-
func charEncode(s string) string {
50-
//NOTE: we need to replace ^ with ^5e and avoid collisions with other hex values
51-
// we split the string into an array of substrings then replace each one as as need to.
52-
p := strings.Split(s, "")
53-
for i, target := range p {
54-
if val, ok := stepOneEncoding[target]; ok == true {
55-
p[i] = val
53+
func charEncode(src []rune) []rune {
54+
// NOTE: We run through stepOneEncoding map first, then stepTwoEncoding...
55+
results := []rune{}
56+
for i := 0; i < len(src); i++ {
57+
if val, ok := stepOneEncoding[src[i]]; ok == true {
58+
results = append(results, val...)
59+
} else {
60+
results = append(results, src[i])
5661
}
5762
}
58-
s = strings.Join(p, "")
59-
for target, replacement := range stepTwoEncoding {
60-
if strings.Contains(s, target) {
61-
s = strings.Replace(s, target, replacement, -1)
63+
for i := 0; i < len(results); i++ {
64+
key := results[i]
65+
if val, ok := stepTwoEncoding[key]; ok == true {
66+
results[i] = val
6267
}
6368
}
64-
return s
69+
return results
6570
}
6671

6772
func charDecode(s string) string {
6873
for replacement, target := range stepTwoEncoding {
69-
if strings.Contains(s, target) {
70-
s = strings.Replace(s, target, replacement, -1)
74+
t := string(target)
75+
r := string(replacement)
76+
if strings.Contains(s, t) {
77+
s = strings.Replace(s, t, r, -1)
7178
}
7279
}
7380
for replacement, target := range stepOneEncoding {
74-
if strings.Contains(s, target) {
75-
s = strings.Replace(s, target, replacement, -1)
81+
t := string(target)
82+
r := string(replacement)
83+
if strings.Contains(s, t) {
84+
s = strings.Replace(s, t, r, -1)
7685
}
7786
}
7887
return s
7988
}
8089

8190
// Encode takes a string and encodes it as a pairtree path.
8291
func Encode(src string) string {
83-
s := charEncode(src)
84-
results := []string{}
92+
//s := charEncode(src)
93+
//s := []rune(src)
94+
s := charEncode([]rune(src))
95+
results := []rune{}
8596
for i := 0; i < len(s); i += 2 {
97+
if len(results) > 0 {
98+
results = append(results, os.PathSeparator)
99+
}
86100
if (i + 2) < len(s) {
101+
//FIXME need to char encode here...
87102
t := s[i : i+2]
88-
results = append(results, t)
103+
results = append(results, t...)
89104
} else {
105+
//FIXME need to char encode here...
90106
t := s[i:]
91-
results = append(results, t)
107+
results = append(results, t...)
92108
}
93109
}
94-
results = append(results, "")
95-
return strings.Join(results, string(os.PathSeparator))
110+
if len(results) > 0 {
111+
return string(results) + "/"
112+
}
113+
return string(results)
96114
}
97115

98116
// Decode takes a pairtree path and returns the original string representation
99117
func Decode(src string) string {
100-
parts := strings.Split(src, string(os.PathSeparator))
118+
s := []rune(src)
101119
results := []string{}
102-
for _, segment := range parts {
103-
if segment == "obj" {
104-
break
105-
}
106-
if len(segment) > 2 {
107-
break
108-
}
109-
if len(segment) == 1 {
110-
results = append(results, segment)
111-
break
120+
prev, cur := 0, 0
121+
for ; cur < len(s); cur++ {
122+
if s[cur] == os.PathSeparator {
123+
switch cur - prev {
124+
case 2:
125+
results = append(results, string(s[prev:cur]))
126+
prev = cur + 1
127+
case 1:
128+
results = append(results, string(s[prev:cur]))
129+
default:
130+
break
131+
}
112132
}
113-
results = append(results, segment)
114133
}
115134
return charDecode(strings.Join(results, ""))
116135
}

pairtree_test.go

Lines changed: 19 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,7 @@ func TestCharEncoding(t *testing.T) {
3131
}
3232

3333
for src, expected := range testCharEncoding {
34-
result := charEncode(src)
34+
result := string(charEncode([]rune(src)))
3535
if result != expected {
3636
t.Errorf("%q, expected %q, got %q", src, expected, result)
3737
}
@@ -108,3 +108,21 @@ func TestAdvanced(t *testing.T) {
108108
}
109109
}
110110
}
111+
112+
func TestUTF8Names(t *testing.T) {
113+
testData := map[string]string{
114+
"Hänggi-P": "Hä/ng/gi/-P/",
115+
}
116+
for src, expected := range testData {
117+
result := Encode(src)
118+
if result != expected {
119+
t.Errorf("encode %q, expected %q, got %q", src, expected, result)
120+
}
121+
}
122+
for expected, src := range testData {
123+
result := Decode(src)
124+
if result != expected {
125+
t.Errorf("decode %q, expected %q, got %q", src, expected, result)
126+
}
127+
}
128+
}

0 commit comments

Comments
 (0)