1 /++
2 $(H2 Sparse Tensors)
3 
4 License:   $(WEB www.boost.org/LICENSE_1_0.txt, Boost License 1.0).
5 
6 Authors:   Ilya Yaroshenko
7 +/
8 module mir.sparse;
9 
10 import std.traits;
11 import std.meta;
12 
13 import mir.ndslice.slice;
14 public import mir.ndslice.field: SparseField;
15 public import mir.ndslice.iterator: ChopIterator, FieldIterator;
16 public import mir.series: isSeries, Series, mir_series, series;
17 public import mir.ndslice.slice: CoordinateValue, Slice, mir_slice;
18 public import mir.ndslice.topology: chopped;
19 
20 
21 //TODO: replace with `static foreach`
22 private template Iota(size_t i, size_t j)
23 {
24     static assert(i <= j, "Iota: i should be less than or equal to j");
25     static if (i == j)
26         alias Iota = AliasSeq!();
27     else
28         alias Iota = AliasSeq!(i, Iota!(i + 1, j));
29 }
30 
31 /++
32 Sparse tensors represented in Dictionary of Keys (DOK) format.
33 
34 Params:
35     N = dimension count
36     lengths = list of dimension lengths
37 Returns:
38     `N`-dimensional slice composed of indeces
39 See_also: $(LREF Sparse)
40 +/
41 Sparse!(T, N) sparse(T, size_t N)(size_t[N] lengths...)
42 {
43     T[size_t] table;
44     table[0] = 0;
45     table.remove(0);
46     assert(table !is null);
47     with (typeof(return)) return FieldIterator!(SparseField!T)(0, SparseField!T(table)).sliced(lengths);
48 }
49 
50 ///
51 pure unittest
52 {
53     auto slice = sparse!double(2, 3);
54     slice[0][] = 1;
55     slice[0, 1] = 2;
56     --slice[0, 0];
57     slice[1, 2] += 4;
58 
59     assert(slice == [[0, 2, 1], [0, 0, 4]]);
60 
61     import std.range.primitives: isRandomAccessRange;
62     static assert(isRandomAccessRange!(Sparse!(double, 2)));
63 
64     import mir.ndslice.slice: Slice, DeepElementType;
65     static assert(is(Sparse!(double, 2) : Slice!(FieldIterator!(SparseField!double), 2)));
66     static assert(is(DeepElementType!(Sparse!(double, 2)) == double));
67 }
68 
69 /++
70 Returns unsorted forward range of (coordinate, value) pairs.
71 
72 Params:
73     slice = sparse slice with pure structure. Any operations on structure of a slice are not allowed.
74 +/
75 auto byCoordinateValue(size_t N, T)(Slice!(FieldIterator!(SparseField!T), N) slice)
76 {
77     struct ByCoordinateValue
78     {
79         private sizediff_t[N-1] _strides;
80         mixin _sparse_range_methods!(typeof(slice._iterator._field._table.byKeyValue()));
81 
82         auto front() @property
83         {S:
84             assert(!_range.empty);
85             auto iv = _range.front;
86             size_t index = iv.key;
87             if (!(_l <= index && index < _r))
88             {
89                 _range.popFront;
90                 goto S;
91             }
92             CoordinateValue!(T, N) ret;
93             foreach (i; Iota!(0, N - 1))
94             {
95                 ret.index[i] = index / _strides[i];
96                 index %= _strides[i];
97             }
98             ret.index[N - 1] = index;
99             ret.value = iv.value;
100             return ret;
101         }
102     }
103     size_t l = slice._iterator._index;
104     size_t r = l + slice.elementCount;
105     size_t length = slice._iterator._field._table.byKey.countInInterval(l, r);
106     return ByCoordinateValue(slice.strides[0..N-1], length, l, r, slice._iterator._field._table.byKeyValue);
107 }
108 
109 ///
110 pure unittest
111 {
112     import mir.array.allocation: array;
113     import mir.ndslice.sorting: sort;
114     alias CV = CoordinateValue!(double, 2);
115 
116     auto slice = sparse!double(3, 3);
117     slice[] = [[0, 2, 1], [0, 0, 4], [6, 7, 0]];
118     assert(slice.byCoordinateValue.array.sort() == [
119         CV([0, 1], 2),
120         CV([0, 2], 1),
121         CV([1, 2], 4),
122         CV([2, 0], 6),
123         CV([2, 1], 7)]);
124 }
125 
126 /++
127 Returns unsorted forward range of coordinates.
128 Params:
129     slice = sparse slice with pure structure. Any operations on structure of a slice are not allowed.
130 +/
131 auto byCoordinate(T, size_t N)(Slice!(FieldIterator!(SparseField!T), N) slice)
132 {
133     struct ByCoordinate
134     {
135         private sizediff_t[N-1] _strides;
136         mixin _sparse_range_methods!(typeof(slice._iterator._field._table.byKey()));
137 
138         auto front() @property
139         {S:
140             assert(!_range.empty);
141             size_t index = _range.front;
142             if (!(_l <= index && index < _r))
143             {
144                 _range.popFront;
145                 goto S;
146             }
147             size_t[N] ret;
148             foreach (i; Iota!(0, N - 1))
149             {
150                 ret[i] = index / _strides[i];
151                 index %= _strides[i];
152             }
153             ret[N - 1] = index;
154             return ret;
155         }
156     }
157     size_t l = slice._iterator._index;
158     size_t r = l + slice.elementCount;
159     size_t length = slice._iterator._field._table.byKey.countInInterval(l, r);
160     return ByCoordinate(slice.strides[0 .. N - 1], length, l, r, slice._iterator._field._table.byKey);
161 }
162 
163 ///
164 pure unittest
165 {
166     import mir.array.allocation: array;
167     import mir.ndslice.sorting: sort;
168 
169     auto slice = sparse!double(3, 3);
170     slice[] = [[0, 2, 1], [0, 0, 4], [6, 7, 0]];
171     assert(slice.byCoordinate.array.sort() == [
172         [0, 1],
173         [0, 2],
174         [1, 2],
175         [2, 0],
176         [2, 1]]);
177 }
178 
179 /++
180 Returns unsorted forward range of values.
181 Params:
182     slice = sparse slice with pure structure. Any operations on structure of a slice are not allowed.
183 +/
184 auto onlyByValue(T, size_t N)(Slice!(FieldIterator!(SparseField!T), N) slice)
185 {
186     struct ByValue
187     {
188         mixin _sparse_range_methods!(typeof(slice._iterator._field._table.byKeyValue()));
189 
190         auto front() @property
191         {S:
192             assert(!_range.empty);
193             auto iv = _range.front;
194             size_t index = iv.key;
195             if (!(_l <= index && index < _r))
196             {
197                 _range.popFront;
198                 goto S;
199             }
200             return iv.value;
201         }
202     }
203     size_t l = slice._iterator._index;
204     size_t r = l + slice.elementCount;
205     size_t length = slice._iterator._field._table.byKey.countInInterval(l, r);
206     return ByValue(length, l, r, slice._iterator._field._table.byKeyValue);
207 }
208 
209 ///
210 pure unittest
211 {
212     import mir.array.allocation: array;
213     import mir.ndslice.sorting: sort;
214 
215     auto slice = sparse!double(3, 3);
216     slice[] = [[0, 2, 1], [0, 0, 4], [6, 7, 0]];
217     assert(slice.onlyByValue.array.sort() == [1, 2, 4, 6, 7]);
218 }
219 
220 pragma(inline, false)
221 private size_t countInInterval(Range)(Range range, size_t l, size_t r)
222 {
223     size_t count;
224     foreach(ref i; range)
225         if (l <= i && i < r)
226             count++;
227     return count;
228 }
229 
230 private mixin template _sparse_range_methods(Range)
231 {
232     private size_t _length, _l, _r;
233     private Range _range;
234 
235     void popFront()
236     {
237         assert(!_range.empty);
238         _range.popFront;
239         _length--;
240     }
241 
242     bool empty() const @property
243     {
244         return _length == 0;
245     }
246 
247     auto save() @property
248     {
249         auto ret = this;
250         ret._range = ret._range.save;
251         return ret;
252     }
253 
254     size_t length() const @property
255     {
256         return _length;
257     }
258 }
259 
260 /++
261 Returns compressed tensor.
262 Note: allocates using GC.
263 +/
264 auto compress(I = uint, J = size_t, SliceKind kind, size_t N, Iterator)(Slice!(Iterator, N, kind) slice)
265     if (N > 1)
266 {
267     return compressWithType!(DeepElementType!(Slice!(Iterator, N, kind)), I, J)(slice);
268 }
269 
270 /// Sparse tensor compression
271 unittest
272 {
273     auto sparse = sparse!double(5, 3);
274     sparse[] =
275         [[0, 2, 1],
276          [0, 0, 4],
277          [0, 0, 0],
278          [6, 0, 9],
279          [0, 0, 5]];
280 
281     auto crs = sparse.compressWithType!double;
282     // assert(crs.iterator._field == CompressedField!(double, uint, uint)(
283     //      3,
284     //     [2, 1, 4, 6, 9, 5],
285     //     [1, 2, 2, 0, 2, 2],
286     //     [0, 2, 3, 3, 5, 6]));
287 }
288 
289 /// Sparse tensor compression
290 unittest
291 {
292     auto sparse = sparse!double(5, 8);
293     sparse[] =
294         [[0, 2, 0, 0, 0, 0, 0, 1],
295          [0, 0, 0, 0, 0, 0, 0, 4],
296          [0, 0, 0, 0, 0, 0, 0, 0],
297          [6, 0, 0, 0, 0, 0, 0, 9],
298          [0, 0, 0, 0, 0, 0, 0, 5]];
299 
300     auto crs = sparse.compressWithType!double;
301     // assert(crs.iterator._field == CompressedField!(double, uint, uint)(
302     //      8,
303     //     [2, 1, 4, 6, 9, 5],
304     //     [1, 7, 7, 0, 7, 7],
305     //     [0, 2, 3, 3, 5, 6]));
306 }
307 
308 /// Dense tensor compression
309 unittest
310 {
311     import mir.ndslice.allocation: slice;
312 
313     auto sl = slice!double(5, 3);
314     sl[] =
315         [[0, 2, 1],
316          [0, 0, 4],
317          [0, 0, 0],
318          [6, 0, 9],
319          [0, 0, 5]];
320 
321     auto crs = sl.compressWithType!double;
322 
323     // assert(crs.iterator._field == CompressedField!(double, uint, uint)(
324     //      3,
325     //     [2, 1, 4, 6, 9, 5],
326     //     [1, 2, 2, 0, 2, 2],
327     //     [0, 2, 3, 3, 5, 6]));
328 }
329 
330 /// Dense tensor compression
331 unittest
332 {
333     import mir.ndslice.allocation: slice;
334 
335     auto sl = slice!double(5, 8);
336     sl[] =
337         [[0, 2, 0, 0, 0, 0, 0, 1],
338          [0, 0, 0, 0, 0, 0, 0, 4],
339          [0, 0, 0, 0, 0, 0, 0, 0],
340          [6, 0, 0, 0, 0, 0, 0, 9],
341          [0, 0, 0, 0, 0, 0, 0, 5]];
342 
343     auto crs = sl.compress;
344     // assert(crs.iterator._field == CompressedField!(double, uint, uint)(
345     //      8,
346     //     [2, 1, 4, 6, 9, 5],
347     //     [1, 7, 7, 0, 7, 7],
348     //     [0, 2, 3, 3, 5, 6]));
349 }
350 
351 /++
352 Returns compressed tensor with different element type.
353 Note: allocates using GC.
354 +/
355 Slice!(ChopIterator!(J*, Series!(I*, V*)), N - 1)
356     compressWithType(V, I = uint, J = size_t, T, size_t N)
357     (Slice!(FieldIterator!(SparseField!T), N) slice)
358     if (is(T : V) && N > 1 && isUnsigned!I)
359 {
360     import mir.array.allocation: array;
361     import mir.ndslice.sorting: sort;
362     import mir.ndslice.topology: iota;
363     auto compressedData = slice
364         .iterator
365         ._field
366         ._table
367         .series!(size_t, T, I, V);
368     auto pointers = new J[slice.shape[0 .. N - 1].iota.elementCount + 1];
369     size_t k = 1, shift;
370     pointers[0] = 0;
371     pointers[1] = 0;
372     const rowLength = slice.length!(N - 1);
373     if(rowLength) foreach (ref index; compressedData.index.field)
374     {
375         for(;;)
376         {
377             sizediff_t newIndex = index - shift;
378             if (newIndex >= rowLength)
379             {
380                 pointers[k + 1] = pointers[k];
381                 shift += rowLength;
382                 k++;
383                 continue;
384             }
385             index = cast(I)newIndex;
386             pointers[k] = cast(J) (pointers[k] + 1);
387             break;
388         }
389 
390     }
391     pointers[k + 1 .. $] = pointers[k];
392     return compressedData.chopped(pointers);
393 }
394 
395 
396 /// ditto
397 Slice!(ChopIterator!(J*, Series!(I*, V*)), N - 1)
398     compressWithType(V, I = uint, J = size_t, Iterator, size_t N, SliceKind kind)
399     (Slice!(Iterator, N, kind) slice)
400     if (!is(Iterator : FieldIterator!(SparseField!ST), ST) && is(DeepElementType!(Slice!(Iterator, N, kind)) : V) && N > 1 && isUnsigned!I)
401 {
402     import std.array: appender;
403     import mir.ndslice.topology: pack, flattened;
404     auto vapp = appender!(V[]);
405     auto iapp = appender!(I[]);
406     auto psl = slice.pack!1;
407     auto count = psl.elementCount;
408     auto pointers = new J[count + 1];
409 
410     pointers[0] = 0;
411     auto elems = psl.flattened;
412     size_t j = 0;
413     foreach (ref pointer; pointers[1 .. $])
414     {
415         auto row = elems.front;
416         elems.popFront;
417         size_t i;
418         foreach (e; row)
419         {
420             if (e)
421             {
422                 vapp.put(e);
423                 iapp.put(cast(I)i);
424                 j++;
425             }
426             i++;
427         }
428         pointer = cast(J)j;
429     }
430     return iapp.data.series(vapp.data).chopped(pointers);
431 }
432 
433 
434 /++
435 Re-compresses a compressed tensor. Makes all values, indeces and pointers consequent in memory.
436 
437 Sparse slice is iterated twice. The first tine it is iterated to get length of each sparse row, the second time - to copy the data.
438 
439 Note: allocates using GC.
440 +/
441 Slice!(ChopIterator!(J*, Series!(I*, V*)), N)
442     recompress
443     (V, I = uint, J = size_t, Iterator, size_t N, SliceKind kind)
444     (Slice!(Iterator, N, kind) sparseSlice)
445     if (isSeries!(DeepElementType!(Slice!(Iterator, N, kind))))
446 {
447     import mir.algorithm.iteration: each;
448     import mir.conv: to;
449     import mir.ndslice.allocation: uninitSlice;
450     import mir.ndslice.topology: as, member, zip;
451     import mir.ndslice. topology: pack, flattened;
452     import std.backdoor: emplaceRef;
453     
454     size_t count = sparseSlice.elementCount;
455     size_t length;
456     auto pointers = uninitSlice!J(count + 1);
457     pointers.front = 0;
458     sparseSlice
459         .member!"data"
460         .member!"elementCount"
461         .each!((len, ref ptr) {ptr = length += len;})(pointers[1 .. $]);
462 
463     auto i = uninitSlice!I(length);
464     auto v = uninitSlice!V(length);
465 
466     auto ret = i.series(v).chopped(pointers);
467 
468     sparseSlice
469         .each!((a, b) {
470             b.index[] = a.index.as!I;
471             b.value.each!(emplaceRef!V)(a.value.as!V);
472         })(ret);
473 
474     return ret;
475 }
476 
477 ///
478 unittest
479 {
480     import mir.ndslice.topology: universal;
481     import mir.ndslice.allocation: slice;
482 
483     auto sl = slice!double(5, 8);
484     sl[] =
485         [[0, 2, 0, 0, 0, 0, 0, 1],
486          [0, 0, 0, 0, 0, 0, 0, 4],
487          [0, 0, 0, 0, 0, 0, 0, 0],
488          [6, 0, 0, 0, 0, 0, 0, 9],
489          [0, 0, 0, 0, 0, 0, 0, 5]];
490 
491     auto crs = sl.compress;
492     // assert(crs.iterator._field == CompressedField!(double, uint, uint)(
493     //      8,
494     //     [2, 1, 4, 6, 9, 5],
495     //     [1, 7, 7, 0, 7, 7],
496     //     [0, 2, 3, 3, 5, 6]));
497 
498     import mir.ndslice.dynamic: reversed;
499     auto rec = crs.reversed.recompress!real;
500     auto rev = sl.universal.reversed.compressWithType!real;
501     assert(rev.structure == rec.structure);
502     // assert(rev.iterator._field.values   == rec.iterator._field.values);
503     // assert(rev.iterator._field.indeces  == rec.iterator._field.indeces);
504     // assert(rev.iterator._field.pointers == rec.iterator._field.pointers);
505 }
506 
507 /++
508 Sparse Slice in Dictionary of Keys (DOK) format.
509 +/
510 alias Sparse(T, size_t N = 1) = Slice!(FieldIterator!(SparseField!T), N);
511 
512 ///
513 alias CompressedVector(T, I = uint) = Series!(T*, I*);
514 
515 ///
516 alias CompressedMatrix(T, I = uint) = Slice!(ChopIterator!(J*, Series!(T*, I*)));
517 
518 ///
519 alias CompressedTensor(T, size_t N, I = uint, J = size_t) = Slice!(ChopIterator!(J*, Series!(T*, I*)), N - 1);
520 
521 ///ditto
522 alias CompressedTensor(T, size_t N : 1, I = uint) = Series!(I*, T*);