ABC: A System for Sequential Synthesis and Verification
 
Loading...
Searching...
No Matches
ac_decomposition.hpp
Go to the documentation of this file.
1
24
25#ifndef _ACD_H_
26#define _ACD_H_
27#pragma once
28
29#include <algorithm>
30#include <cassert>
31#include <cstdint>
32#include <type_traits>
33#include <unordered_map>
34#include <vector>
35
36#include "kitty_constants.hpp"
38#include "kitty_dynamic_tt.hpp"
39#include "kitty_operations.hpp"
40#include "kitty_operators.hpp"
41#include "kitty_static_tt.hpp"
42
43#ifdef _MSC_VER
44# include <intrin.h>
45# define __builtin_popcount __popcnt
46#endif
47
49
50namespace acd
51{
52
55{
58
61
64
66 bool use_first{ false };
67
69 bool try_no_late_arrival{ false };
70};
71
79
85
87{
88private:
89 struct encoding_column
90 {
91 uint64_t column[2];
92 uint32_t cost;
93 uint32_t index;
94 float sort_cost;
95 };
96
97private:
98 static constexpr uint32_t max_num_vars = 11;
100
101public:
102 explicit ac_decomposition_impl( uint32_t num_vars, ac_decomposition_params const& ps, ac_decomposition_stats* pst = nullptr )
103 : num_vars( num_vars ), ps( ps ), pst( pst )
104 {
105 std::iota( permutations.begin(), permutations.end(), 0 );
106 }
107
109 int run( word* ptt, unsigned delay_profile )
110 {
111 /* truth table is too large for the settings */
112 if ( num_vars > max_num_vars )
113 {
114 return -1;
115 }
116
117 uint32_t late_arriving = __builtin_popcount( delay_profile );
118
119 /* relax maximum number of free set variables if a function has more variables */
120 if ( num_vars > ps.max_free_set_vars + ps.lut_size )
121 {
122 ps.max_free_set_vars = num_vars - ps.lut_size;
123 }
124 if ( late_arriving > ps.max_free_set_vars )
125 {
126 return -1; /* on average avoiding this computation leads to better quality */
127 // ps.max_free_set_vars = late_arriving;
128 }
129
130 /* return a high cost if too many late arriving variables */
131 if ( late_arriving > ps.lut_size - 1 )
132 {
133 return -1;
134 }
135
136 /* convert to static TT */
137 init_truth_table( ptt );
138
139 /* permute late arriving variables to be the least significant */
140 reposition_late_arriving_variables( delay_profile, late_arriving );
141
142 /* run ACD trying different bound sets and free sets */
143 if ( !find_decomposition( delay_profile, late_arriving ) )
144 {
145 return -1;
146 }
147
148 /* return number of levels */
149 return delay_profile == 0 ? 2 : 1;
150 }
151
153 {
154 if ( best_multiplicity == UINT32_MAX )
155 return -1;
156
157 /* compute isets */
158 std::vector<STT> isets = compute_isets();
159
160 generate_support_minimization_encodings();
161
162 /* solves exactly only for small multiplicities */
163 if ( best_multiplicity <= 4u )
164 solve_min_support_exact( isets );
165 else
166 solve_min_support_heuristic( isets );
167
168 /* unfeasible decomposition */
169 assert( !best_bound_sets.empty() );
170
171 return 0;
172 }
173
174 unsigned get_profile()
175 {
176 unsigned profile = 0;
177
178 if ( best_free_set > num_vars )
179 return -1;
180
181 for ( uint32_t i = 0; i < best_free_set; ++i )
182 {
183 profile |= 1 << permutations[i];
184 }
185
186 return profile;
187 }
188
189 void get_decomposition( unsigned char* decompArray )
190 {
191 if ( best_free_set > num_vars )
192 return;
193
194 generate_decomposition();
195 get_decomposition_abc( decompArray );
196 }
197
198private:
199 bool find_decomposition( unsigned& delay_profile, uint32_t late_arriving )
200 {
201 best_multiplicity = UINT32_MAX;
202 best_free_set = UINT32_MAX;
203 uint32_t best_cost = UINT32_MAX;
204 uint32_t offset = static_cast<uint32_t>( late_arriving );
205 uint32_t start = std::max( offset, 1u );
206
207 /* perform only support reducing decomposition */
208 if ( ps.support_reducing_only )
209 {
210 start = std::max( start, num_vars - ps.lut_size );
211 }
212
213 /* array of functions to compute the column multiplicity */
214 std::function<uint32_t( STT const& tt )> column_multiplicity_fn[5] = {
215 [this]( STT const& tt ) { return column_multiplicity<1u>( tt ); },
216 [this]( STT const& tt ) { return column_multiplicity<2u>( tt ); },
217 [this]( STT const& tt ) { return column_multiplicity<3u>( tt ); },
218 [this]( STT const& tt ) { return column_multiplicity5<4u>( tt ); },
219 [this]( STT const& tt ) { return column_multiplicity5<5u>( tt ); } };
220
221 /* find a feasible AC decomposition */
222 // for ( uint32_t i = std::min( ps.lut_size - 1, ps.max_free_set_vars); i >= start; --i )
223 for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i )
224 {
225 auto ret_tuple = enumerate_iset_combinations( i, offset, column_multiplicity_fn[i - 1] );
226 uint32_t multiplicity = std::get<2>( ret_tuple );
227
228 /* additional cost if not support reducing */
229 uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0;
230
231 /* check for feasible solution that improves the cost */
232 if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 )
233 {
234 best_tt = std::get<0>( ret_tuple );
235 permutations = std::get<1>( ret_tuple );
236 best_multiplicity = multiplicity;
237 best_cost = multiplicity + additional_cost;
238 best_free_set = i;
239
240 if ( !ps.use_first && multiplicity > 2 )
241 {
242 continue;
243 }
244 }
245
246 break;
247 }
248
249 if ( best_multiplicity == UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) )
250 return false;
251
252 /* try without the delay profile */
253 if ( best_multiplicity == UINT32_MAX )
254 {
255 delay_profile = 0;
256 if ( ps.support_reducing_only )
257 {
258 start = std::max( 1u, num_vars - ps.lut_size );
259 }
260
261 for ( uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i )
262 {
263 auto ret_tuple = enumerate_iset_combinations( i, 0, column_multiplicity_fn[i - 1] );
264 uint32_t multiplicity = std::get<2>( ret_tuple );
265
266 /* additional cost if not support reducing */
267 uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0;
268
269 /* check for feasible solution that improves the cost */
270 if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 )
271 {
272 best_tt = std::get<0>( ret_tuple );
273 permutations = std::get<1>( ret_tuple );
274 best_multiplicity = multiplicity;
275 best_cost = multiplicity + additional_cost;
276 best_free_set = i;
277
278 if ( !ps.use_first && multiplicity > 2 )
279 {
280 continue;
281 }
282 }
283
284 break;
285 }
286 }
287
288 if ( best_multiplicity == UINT32_MAX )
289 return false;
290
291 /* estimation on number of LUTs */
292 if ( pst )
293 {
294 pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3
295 : best_multiplicity <= 8 ? 4
296 : 5;
297 pst->num_edges = ( pst->num_luts - 1 ) * ( num_vars - best_free_set ) + ( pst->num_luts - 1 ) + best_free_set;
298 }
299
300 return true;
301 }
302
303 void init_truth_table( word* ptt )
304 {
305 uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) );
306
307 for ( uint32_t i = 0; i < num_blocks; ++i )
308 {
309 best_tt._bits[i] = ptt[i];
310 }
311
312 // local_extend_to( best_tt, num_vars );
313 }
314
315 template<uint32_t free_set_size>
316 uint32_t column_multiplicity( STT const& tt )
317 {
318 uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u };
319 uint32_t multiplicity = 0;
320 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
321 uint64_t constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F };
322 uint64_t constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 };
323
324 /* supports up to 64 values of free set (256 for |FS| == 3)*/
325 static_assert( free_set_size <= 3, "Wrong free set size for method used, expected le 3" );
326
327 /* extract iset functions */
328 for ( auto i = 0u; i < num_blocks; ++i )
329 {
330 uint64_t cof = tt._bits[i];
331 for ( auto j = 0; j < ( 64 >> free_set_size ); ++j )
332 {
333 multiplicity_set[( cof >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( cof & masks_bits[free_set_size] );
334 cof >>= ( 1u << free_set_size );
335 }
336 }
337
338 multiplicity = __builtin_popcountl( multiplicity_set[0] );
339
340 if ( free_set_size == 3 )
341 {
342 multiplicity += __builtin_popcountl( multiplicity_set[1] );
343 multiplicity += __builtin_popcountl( multiplicity_set[2] );
344 multiplicity += __builtin_popcountl( multiplicity_set[3] );
345 }
346
347 return multiplicity;
348 }
349
350 template<uint32_t free_set_size>
351 uint32_t column_multiplicity5( STT const& tt )
352 {
353 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
354 uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF };
355
356 static_assert( free_set_size == 5 || free_set_size == 4, "Wrong free set size for method used, expected of 4 or 5" );
357
358 uint32_t size = 0;
359 uint64_t prev = -1;
360 std::array<uint32_t, 64> multiplicity_set;
361
362 /* extract iset functions */
363 for ( auto i = 0u; i < num_blocks; ++i )
364 {
365 uint64_t cof = tt._bits[i];
366 for ( auto j = 0; j < ( 64 >> free_set_size ); ++j )
367 {
368 uint64_t fs_fn = cof & masks[free_set_size];
369 if ( fs_fn != prev )
370 {
371 multiplicity_set[size++] = static_cast<uint32_t>( fs_fn );
372 prev = fs_fn;
373 }
374 cof >>= ( 1u << free_set_size );
375 }
376 }
377
378 std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size );
379
380 /* count unique */
381 uint32_t multiplicity = 1;
382 for ( auto i = 1u; i < size; ++i )
383 {
384 multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0;
385 }
386
387 return multiplicity;
388 }
389
390 uint32_t column_multiplicity2( STT const& tt, uint32_t free_set_size )
391 {
392 assert( free_set_size <= 5 );
393
394 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
395 uint64_t const shift = UINT64_C( 1 ) << free_set_size;
396 uint64_t const mask = ( UINT64_C( 1 ) << shift ) - 1;
397 uint32_t cofactors[4];
398 uint32_t size = 0;
399
400 /* extract iset functions */
401 for ( auto i = 0u; i < num_blocks; ++i )
402 {
403 uint64_t sub = tt._bits[i];
404 for ( auto j = 0; j < ( 64 >> free_set_size ); ++j )
405 {
406 uint32_t fs_fn = static_cast<uint32_t>( sub & mask );
407 uint32_t k;
408 for ( k = 0; k < size; ++k )
409 {
410 if ( fs_fn == cofactors[k] )
411 break;
412 }
413 if ( k == 2 )
414 return 3;
415 if ( k == size )
416 cofactors[size++] = fs_fn;
417 sub >>= shift;
418 }
419 }
420
421 return size;
422 }
423
424 inline bool combinations_offset_next( uint32_t k, uint32_t offset, uint32_t* pComb, uint32_t* pInvPerm, STT& tt )
425 {
426 uint32_t i;
427
428 for ( i = k - 1; pComb[i] == num_vars - k + i; --i )
429 {
430 if ( i == offset )
431 return false;
432 }
433
434 /* move vars */
435 uint32_t var_old = pComb[i];
436 uint32_t pos_new = pInvPerm[var_old + 1];
437 std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] );
438 std::swap( pComb[i], pComb[pos_new] );
439 swap_inplace_local( tt, i, pos_new );
440
441 for ( uint32_t j = i + 1; j < k; j++ )
442 {
443 var_old = pComb[j];
444 pos_new = pInvPerm[pComb[j - 1] + 1];
445 std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] );
446 std::swap( pComb[j], pComb[pos_new] );
447 swap_inplace_local( tt, j, pos_new );
448 }
449
450 return true;
451 }
452
453 template<typename Fn>
454 std::tuple<STT, std::array<uint32_t, max_num_vars>, uint32_t> enumerate_iset_combinations( uint32_t free_set_size, uint32_t offset, Fn&& fn )
455 {
456 STT tt = best_tt;
457
458 /* TT with best cost */
459 STT local_best_tt = tt;
460 uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
461
462 assert( free_set_size >= offset );
463
464 /* special case */
465 if ( free_set_size == offset )
466 {
467 best_cost = fn( tt );
468 return std::make_tuple( tt, permutations, best_cost );
469 }
470
471 /* works up to 16 input truth tables */
472 assert( num_vars <= 16 );
473
474 /* Search for column multiplicity of 2 */
475 if ( free_set_size == ps.lut_size - 1 )
476 {
477 return enumerate_iset_combinations2( free_set_size, offset );
478 }
479
480 /* init combinations */
481 uint32_t pComb[16], pInvPerm[16], bestPerm[16];
482 for ( uint32_t i = 0; i < num_vars; ++i )
483 {
484 pComb[i] = pInvPerm[i] = i;
485 }
486
487 /* early bail-out conditions */
488 uint32_t bail_multiplicity = 2;
489 if ( best_multiplicity < UINT32_MAX )
490 {
491 bail_multiplicity = ( best_multiplicity >> 1 ) + ( best_multiplicity & 1 );
492 }
493
494 /* enumerate combinations */
495 do
496 {
497 uint32_t cost = fn( tt );
498 if ( cost < best_cost )
499 {
500 local_best_tt = tt;
501 best_cost = cost;
502 for ( uint32_t i = 0; i < num_vars; ++i )
503 {
504 bestPerm[i] = pComb[i];
505 }
506
507 if ( best_cost <= bail_multiplicity )
508 {
509 break;
510 }
511 }
512 } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );
513
514 std::array<uint32_t, max_num_vars> res_perm;
515
516 if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) )
517 {
518 return std::make_tuple( local_best_tt, res_perm, UINT32_MAX );
519 }
520
521 for ( uint32_t i = 0; i < num_vars; ++i )
522 {
523 res_perm[i] = permutations[bestPerm[i]];
524 }
525
526 return std::make_tuple( local_best_tt, res_perm, best_cost );
527 }
528
529 inline std::tuple<STT, std::array<uint32_t, max_num_vars>, uint32_t> enumerate_iset_combinations2( uint32_t free_set_size, uint32_t offset )
530 {
531 STT tt = best_tt;
532
533 /* TT with best cost */
534 STT local_best_tt = tt;
535 uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
536
537 assert( free_set_size >= offset );
538
539 /* init combinations */
540 uint32_t pComb[16], pInvPerm[16];
541 for ( uint32_t i = 0; i < num_vars; ++i )
542 {
543 pComb[i] = pInvPerm[i] = i;
544 }
545
546 /* enumerate combinations */
547 std::array<uint32_t, max_num_vars> res_perm;
548
549 do
550 {
551 uint32_t cost = column_multiplicity2( tt, free_set_size );
552 if ( cost <= 2 )
553 {
554 local_best_tt = tt;
555 best_cost = cost;
556 for ( uint32_t i = 0; i < num_vars; ++i )
557 {
558 res_perm[i] = permutations[pComb[i]];
559 }
560 return std::make_tuple( local_best_tt, res_perm, best_cost );
561 }
562 } while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );
563
564 return std::make_tuple( local_best_tt, res_perm, UINT32_MAX );
565 }
566
567 std::vector<STT> compute_isets( bool verbose = false )
568 {
569 /* construct isets involved in multiplicity */
570 uint32_t isets_support = num_vars - best_free_set;
571 std::vector<STT> isets( best_multiplicity );
572
573 /* construct isets */
574 std::unordered_map<uint64_t, uint32_t> column_to_iset;
575 STT tt = best_tt;
576 uint32_t offset = 0;
577 uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
578 uint64_t constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF };
579
580 auto it = std::begin( tt );
581 for ( auto i = 0u; i < num_blocks; ++i )
582 {
583 for ( auto j = 0; j < ( 64 >> best_free_set ); ++j )
584 {
585 uint64_t val = *it & masks[best_free_set];
586
587 auto el = column_to_iset.find( val );
588 if ( el != column_to_iset.end() )
589 {
590 isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset );
591 }
592 else
593 {
594 isets[column_to_iset.size()]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset );
595 column_to_iset[val] = column_to_iset.size();
596 }
597
598 *it >>= ( 1u << best_free_set );
599 }
600
601 offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F;
602 ++it;
603 }
604
605 /* extend isets to cover the whole truth table */
606 for ( STT& iset : isets )
607 {
608 local_extend_to( iset, isets_support );
609 }
610
611 /* save free_set functions */
612 std::vector<STT> free_set_tts( best_multiplicity );
613
614 for ( auto const& pair : column_to_iset )
615 {
616 free_set_tts[pair.second]._bits[0] = pair.first;
617 local_extend_to( free_set_tts[pair.second], best_free_set );
618 }
619
620 /* print isets and free set*/
621 if ( verbose )
622 {
623 std::cout << "iSets\n";
624 uint32_t i = 0;
625 for ( auto iset : isets )
626 {
627 kitty::print_hex( iset );
628 std::cout << " of func ";
629 kitty::print_hex( free_set_tts[i++] );
630 std::cout << "\n";
631 }
632 }
633
634 best_free_set_tts = std::move( free_set_tts );
635
636 return isets;
637 }
638
639 void generate_decomposition()
640 {
641 dec_result.clear();
642
643 uint32_t num_edges = 0;
644 for ( uint32_t i = 0; i < best_bound_sets.size(); ++i )
645 {
646 ac_decomposition_result dec;
647 auto tt = best_bound_sets[i];
648 auto care = best_care_sets[i];
649
650 /* compute and minimize support for bound set variables */
651 uint32_t k = 0;
652 for ( uint32_t j = 0; j < num_vars - best_free_set; ++j )
653 {
654 if ( !kitty::has_var( tt, care, j ) )
655 {
656 /* fix truth table */
657 adjust_truth_table_on_dc( tt, care, tt.num_vars(), j );
658 continue;
659 }
660
661 if ( k < j )
662 {
663 kitty::swap_inplace( tt, k, j );
664 kitty::swap_inplace( care, k, j );
665 }
666 dec.support.push_back( permutations[best_free_set + j] );
667 ++k;
668 }
669
670 dec.tt = kitty::shrink_to( tt, dec.support.size() );
671 dec_result.push_back( dec );
672 num_edges += dec.support.size() > 1 ? dec.support.size() : 0;
673 }
674
675 /* compute the decomposition for the top-level LUT */
676 compute_top_lut_decomposition();
677
678 if ( pst )
679 {
680 pst->num_luts = dec_result.size();
681 pst->num_edges = num_edges + dec_result.back().support.size();
682 }
683 }
684
685 void compute_top_lut_decomposition()
686 {
687 uint32_t top_vars = best_bound_sets.size() + best_free_set;
688 assert( top_vars <= ps.lut_size );
689
690 /* extend bound set functions with free_set_size LSB vars */
691 kitty::dynamic_truth_table tt( top_vars );
692
693 /* compute support */
694 dec_result.emplace_back();
695 for ( uint32_t i = 0; i < best_free_set; ++i )
696 {
697 dec_result.back().support.push_back( permutations[i] );
698 }
699
700 /* create functions for bound set */
701 std::vector<kitty::dynamic_truth_table> bound_set_vars;
702 auto res_it = dec_result.begin();
703 uint32_t offset = 0;
704 for ( uint32_t i = 0; i < best_bound_sets.size(); ++i )
705 {
706 bound_set_vars.emplace_back( top_vars );
707 kitty::create_nth_var( bound_set_vars[i], best_free_set + i );
708
709 /* add bound-set variables to the support, remove buffers (shared set) */
710 if ( res_it->support.size() == 1 )
711 {
712 dec_result.back().support.push_back( res_it->support.front() );
713 /* it is a NOT */
714 if ( ( res_it->tt._bits[0] & 1 ) == 1 )
715 {
716 bound_set_vars[i] = ~bound_set_vars[i];
717 }
718 dec_result.erase( res_it );
719 ++offset;
720 }
721 else
722 {
723 dec_result.back().support.push_back( num_vars + i - offset );
724 ++res_it;
725 }
726 }
727
728 /* create composition function */
729 for ( uint32_t i = 0; i < best_free_set_tts.size(); ++i )
730 {
731 kitty::dynamic_truth_table free_set_tt = kitty::shrink_to( best_free_set_tts[i], top_vars );
732
733 /* find MUX assignments */
734 for ( uint32_t j = 0; j < bound_set_vars.size(); ++j )
735 {
736 /* AND with ONSET or OFFSET */
737 if ( ( ( best_iset_onset[j] >> i ) & 1 ) )
738 {
739 free_set_tt &= bound_set_vars[j];
740 }
741 else if ( ( ( best_iset_offset[j] >> i ) & 1 ) )
742 {
743 free_set_tt &= ~bound_set_vars[j];
744 }
745 }
746
747 tt |= free_set_tt;
748 }
749
750 /* add top-level LUT to result */
751 dec_result.back().tt = tt;
752 }
753
754 inline void reposition_late_arriving_variables( unsigned delay_profile, uint32_t late_arriving )
755 {
756 uint32_t k = 0;
757 for ( uint32_t i = 0; i < late_arriving; ++i )
758 {
759 while ( ( ( delay_profile >> k ) & 1 ) == 0 )
760 ++k;
761
762 if ( permutations[i] == k )
763 {
764 ++k;
765 continue;
766 }
767
768 std::swap( permutations[i], permutations[k] );
769 swap_inplace_local( best_tt, i, k );
770 ++k;
771 }
772 }
773
774 template<class Iterator>
775 void print_perm( Iterator begin, uint32_t free_set )
776 {
777 std::cout << "[";
778 for ( uint32_t i = 0; i < num_vars; ++i )
779 {
780 if ( i == free_set )
781 {
782 std::cout << ", ";
783 }
784 std::cout << *begin << " ";
785 ++begin;
786 }
787 std::cout << "]\n";
788 }
789
790 void generate_support_minimization_encodings()
791 {
792 uint32_t count = 0;
793
794 /* enable don't cares only if not a power of 2 */
795 uint32_t num_combs = 2;
796 if ( __builtin_popcount( best_multiplicity ) == 1 )
797 {
798 uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 };
799 for ( uint32_t i = 0; i < 4; ++i )
800 {
801 if ( ( best_multiplicity >> i ) == 2u )
802 {
803 num_combs = num_combs_exact[i];
804 }
805 }
806 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
807 generate_support_minimization_encodings_rec<false, true>( 0, 0, 0, count, best_multiplicity >> 1, true );
808 assert( count == num_combs );
809 return;
810 }
811
812 /* constraint the number of offset classes for a strict encoding */
813 int32_t min_set_size = 1;
814 if ( best_multiplicity <= 4 )
815 min_set_size = 2;
816 else if ( best_multiplicity <= 8 )
817 min_set_size = 4;
818 else
819 min_set_size = 8;
820 min_set_size = best_multiplicity - min_set_size;
821
822 if ( best_multiplicity > 8 )
823 {
824 /* distinct elements in 2 indistinct bins with at least `min_set_size` elements in the indistinct bins */
825 uint32_t class_sizes[13] = { 3, 3, 15, 25, 35, 35, 255, 501, 957, 1749, 3003, 4719, 6435 };
826 num_combs = class_sizes[best_multiplicity - 3];
827 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
828 generate_support_minimization_encodings_rec<false, false>( 0, 0, 0, count, min_set_size, true );
829 }
830 else
831 {
832 /* distinct elements in 3 bins, of which 2 are indistinct, and with at least `min_set_size` elements in the indistinct bins */
833 uint32_t class_sizes[13] = { 6, 3, 90, 130, 105, 35, 9330, 23436, 48708, 78474, 91377, 70785, 32175 };
834 num_combs = class_sizes[best_multiplicity - 3];
835 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
836 generate_support_minimization_encodings_rec<true, false>( 0, 0, 0, count, min_set_size, true );
837 }
838
839 assert( count == num_combs );
840 }
841
842 template<bool enable_dcset, bool equal_size_partition>
843 void generate_support_minimization_encodings_rec( uint32_t onset, uint32_t offset, uint32_t var, uint32_t& count, int32_t min_set_size, bool first )
844 {
845 if ( var == best_multiplicity )
846 {
847 if ( equal_size_partition )
848 {
849 /* sets must be equally populated */
850 if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) )
851 {
852 return;
853 }
854 }
855 else if ( __builtin_popcount( onset ) < min_set_size || __builtin_popcount( offset ) < min_set_size )
856 {
857 /* ON-set and OFF-set must be populated with at least min_set_size elements */
858 return;
859 }
860
861 support_minimization_encodings[count][0] = onset;
862 support_minimization_encodings[count][1] = offset;
863 ++count;
864 return;
865 }
866
867 /* var in DCSET */
868 if ( enable_dcset )
869 {
870 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size, first );
871 }
872
873 /* move var in ONSET */
874 onset |= 1 << var;
875 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size, false );
876 onset &= ~( 1 << var );
877
878 /* remove symmetries */
879 if ( first )
880 {
881 return;
882 }
883
884 /* move var in OFFSET */
885 offset |= 1 << var;
886 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size, false );
887 offset &= ~( 1 << var );
888 }
889
890 void solve_min_support_exact( std::vector<STT> const& isets )
891 {
892 std::vector<encoding_column> matrix;
893 matrix.reserve( support_minimization_encodings.size() );
894 best_bound_sets.clear();
895
896 /* create covering matrix */
897 if ( !create_covering_matrix<false>( isets, matrix, false ) )
898 {
899 return;
900 }
901
902 /* solve the covering problem */
903 std::array<uint32_t, 6> solution = covering_solve_exact( matrix );
904
905 /* check for failed decomposition */
906 if ( solution[0] == UINT32_MAX )
907 {
908 return;
909 }
910
911 /* compute best bound sets */
912 uint32_t num_luts = 1 + solution[5];
913 uint32_t num_levels = 2;
914 uint32_t num_edges = best_free_set + solution[5];
915 uint32_t isets_support = num_vars - best_free_set;
916 best_care_sets.clear();
917 best_iset_onset.clear();
918 best_iset_offset.clear();
919 for ( uint32_t i = 0; i < solution[5]; ++i )
920 {
921 STT tt;
922 STT care;
923
924 const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0];
925 const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1];
926 for ( uint32_t j = 0; j < best_multiplicity; ++j )
927 {
928 if ( ( ( onset >> j ) & 1 ) )
929 {
930 tt |= isets[j];
931 }
932 if ( ( ( offset >> j ) & 1 ) )
933 {
934 care |= isets[j];
935 }
936 }
937
938 care |= tt;
939 num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 );
940
941 best_bound_sets.push_back( tt );
942 best_care_sets.push_back( care );
943 best_iset_onset.push_back( onset );
944 best_iset_offset.push_back( offset );
945 }
946
947 if ( pst )
948 {
949 pst->num_luts = num_luts;
950 pst->num_levels = num_levels;
951 pst->num_edges = num_edges;
952 }
953 }
954
955 void solve_min_support_heuristic( std::vector<STT> const& isets )
956 {
957 std::vector<encoding_column> matrix;
958 matrix.reserve( support_minimization_encodings.size() );
959 best_bound_sets.clear();
960
961 /* create covering matrix */
962 if ( !create_covering_matrix<true>( isets, matrix, true ) )
963 {
964 return;
965 }
966
967 /* solve the covering problem: heuristic pass + local search */
968 std::array<uint32_t, 6> solution = covering_solve_heuristic( matrix );
969
970 /* check for failed decomposition */
971 if ( solution[0] == UINT32_MAX )
972 {
973 return;
974 }
975
976 /* improve solution with local search */
977 while ( covering_improve( matrix, solution ) )
978 ;
979
980 /* compute best bound sets */
981 uint32_t num_luts = 1 + solution[5];
982 uint32_t num_levels = 2;
983 uint32_t num_edges = best_free_set + solution[5];
984 uint32_t isets_support = num_vars - best_free_set;
985 best_care_sets.clear();
986 best_iset_onset.clear();
987 best_iset_offset.clear();
988 for ( uint32_t i = 0; i < solution[5]; ++i )
989 {
990 STT tt;
991 STT care;
992
993 const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0];
994 const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1];
995 for ( uint32_t j = 0; j < best_multiplicity; ++j )
996 {
997 if ( ( ( onset >> j ) & 1 ) )
998 {
999 tt |= isets[j];
1000 }
1001 if ( ( ( offset >> j ) & 1 ) )
1002 {
1003 care |= isets[j];
1004 }
1005 }
1006
1007 care |= tt;
1008 num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 );
1009
1010 best_bound_sets.push_back( tt );
1011 best_care_sets.push_back( care );
1012 best_iset_onset.push_back( onset );
1013 best_iset_offset.push_back( offset );
1014 }
1015
1016 if ( pst )
1017 {
1018 pst->num_luts = num_luts;
1019 pst->num_levels = num_levels;
1020 pst->num_edges = num_edges;
1021 }
1022 }
1023
1024 template<bool UseHeuristic>
1025 bool create_covering_matrix( std::vector<STT> const& isets, std::vector<encoding_column>& matrix, bool sort )
1026 {
1027 assert( best_multiplicity <= 16 );
1028 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1029 uint32_t iset_support = num_vars - best_free_set;
1030
1031 /* insert dichotomies */
1032 for ( uint32_t i = 0; i < support_minimization_encodings.size(); ++i )
1033 {
1034 uint32_t const onset = support_minimization_encodings[i][0];
1035 uint32_t const offset = support_minimization_encodings[i][1];
1036
1037 /* compute function and distinguishable seed dichotomies */
1038 uint64_t column[2] = { 0, 0 };
1039 STT tt;
1040 STT care;
1041 uint32_t pair_pointer = 0;
1042 for ( uint32_t j = 0; j < best_multiplicity; ++j )
1043 {
1044 auto onset_shift = ( onset >> j );
1045 auto offset_shift = ( offset >> j );
1046 if ( ( onset_shift & 1 ) )
1047 {
1048 tt |= isets[j];
1049 }
1050
1051 if ( ( offset_shift & 1 ) )
1052 {
1053 care |= isets[j];
1054 }
1055
1056 /* compute included seed dichotomies */
1057 for ( uint32_t k = j + 1; k < best_multiplicity; ++k )
1058 {
1059 /* if are in diffent sets */
1060 if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) )
1061 {
1062 column[pair_pointer >> 6u] |= UINT64_C( 1 ) << ( pair_pointer & 0x3F );
1063 }
1064
1065 ++pair_pointer;
1066 }
1067 }
1068
1069 care |= tt;
1070
1071 /* compute cost */
1072 uint32_t cost = 0;
1073 for ( uint32_t j = 0; j < iset_support; ++j )
1074 {
1075 cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0;
1076 // if ( !has_var_support( tt, care, iset_support, j ) )
1077 // {
1078 // /* adjust truth table and care set */
1079 // adjust_truth_table_on_dc( tt, care, iset_support, j );
1080 // continue;
1081 // }
1082 // ++cost;
1083 }
1084
1085 /* discard solutions with support over LUT size */
1086 if ( cost > ps.lut_size )
1087 continue;
1088
1089 /* buffers have zero cost */
1090 if ( cost == 1 )
1091 cost = 0;
1092
1093 float sort_cost = 0;
1094 if ( UseHeuristic )
1095 {
1096 sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) );
1097 }
1098 else
1099 {
1100 sort_cost = cost + ( ( combinations - __builtin_popcountl( column[0] + __builtin_popcountl( column[1] ) ) ) << num_vars );
1101 }
1102
1103 /* insert */
1104 matrix.emplace_back( encoding_column{ { column[0], column[1] }, cost, i, sort_cost } );
1105 }
1106
1107 if ( !sort )
1108 {
1109 return true;
1110 }
1111
1112 if ( UseHeuristic )
1113 {
1114 std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) {
1115 return a.cost < b.cost;
1116 } );
1117 }
1118 else
1119 {
1120 std::sort( matrix.begin(), matrix.end(), [&]( encoding_column const& a, encoding_column const& b ) {
1121 return a.sort_cost < b.sort_cost;
1122 } );
1123 }
1124
1125 return true;
1126 }
1127
1128 std::array<uint32_t, 6> covering_solve_exact( std::vector<encoding_column>& matrix )
1129 {
1130 /* last value of res contains the size of the bound set */
1131 std::array<uint32_t, 6> res = { UINT32_MAX };
1132 uint32_t best_cost = UINT32_MAX;
1133 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1134
1135 assert( best_multiplicity <= 4 );
1136
1137 /* determine the number of needed loops*/
1138 if ( best_multiplicity <= 2 )
1139 {
1140 res[5] = 1;
1141 res[0] = 0;
1142 }
1143 else if ( best_multiplicity <= 4 )
1144 {
1145 res[5] = 2;
1146 for ( uint32_t i = 0; i < matrix.size() - 1; ++i )
1147 {
1148 for ( uint32_t j = 1; j < matrix.size(); ++j )
1149 {
1150 /* filter by cost */
1151 if ( matrix[i].cost + matrix[j].cost >= best_cost )
1152 continue;
1153
1154 /* check validity */
1155 if ( __builtin_popcountl( matrix[i].column[0] | matrix[j].column[0] ) + __builtin_popcountl( matrix[i].column[1] | matrix[j].column[1] ) == combinations )
1156 {
1157 res[0] = i;
1158 res[1] = j;
1159 best_cost = matrix[i].cost + matrix[j].cost;
1160 }
1161 }
1162 }
1163 }
1164
1165 return res;
1166 }
1167
1168 std::array<uint32_t, 6> covering_solve_heuristic( std::vector<encoding_column>& matrix )
1169 {
1170 /* last value of res contains the size of the bound set */
1171 std::array<uint32_t, 6> res = { UINT32_MAX };
1172 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1173 uint64_t column0 = 0, column1 = 0;
1174
1175 uint32_t best = 0;
1176 float best_cost = std::numeric_limits<float>::max();
1177 for ( uint32_t i = 0; i < matrix.size(); ++i )
1178 {
1179 if ( matrix[i].sort_cost < best_cost )
1180 {
1181 best = i;
1182 best_cost = matrix[i].sort_cost;
1183 }
1184 }
1185
1186 /* select */
1187 column0 = matrix[best].column[0];
1188 column1 = matrix[best].column[1];
1189 std::swap( matrix[0], matrix[best] );
1190
1191 /* get max number of BS's */
1192 uint32_t iter = 1;
1193
1194 while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) != combinations )
1195 {
1196 /* select column that minimizes the cost */
1197 best = 0;
1198 best_cost = std::numeric_limits<float>::max();
1199 for ( uint32_t i = iter; i < matrix.size(); ++i )
1200 {
1201 float local_cost = 1.0f / ( __builtin_popcountl( matrix[i].column[0] & ~column0 ) + __builtin_popcountl( matrix[i].column[1] & ~column1 ) );
1202 if ( local_cost < best_cost )
1203 {
1204 best = i;
1205 best_cost = local_cost;
1206 }
1207 }
1208
1209 column0 |= matrix[best].column[0];
1210 column1 |= matrix[best].column[1];
1211 std::swap( matrix[iter], matrix[best] );
1212 ++iter;
1213 }
1214
1215 if ( __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) == combinations )
1216 {
1217 for ( uint32_t i = 0; i < iter; ++i )
1218 {
1219 res[i] = i;
1220 }
1221 res[5] = iter;
1222 }
1223
1224 return res;
1225 }
1226
1227 bool covering_improve( std::vector<encoding_column> const& matrix, std::array<uint32_t, 6>& solution )
1228 {
1229 /* performs one iteration of local search */
1230 uint32_t best_cost = 0, local_cost = 0;
1231 uint32_t num_elements = solution[5];
1232 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1233 bool improved = false;
1234
1235 /* compute current cost */
1236 for ( uint32_t i = 0; i < num_elements; ++i )
1237 {
1238 best_cost += matrix[solution[i]].cost;
1239 }
1240
1241 uint64_t column0, column1;
1242 for ( uint32_t i = 0; i < num_elements; ++i )
1243 {
1244 /* remove element i */
1245 local_cost = 0;
1246 column0 = 0;
1247 column1 = 0;
1248 for ( uint32_t j = 0; j < num_elements; ++j )
1249 {
1250 if ( j == i )
1251 continue;
1252 local_cost += matrix[solution[j]].cost;
1253 column0 |= matrix[solution[j]].column[0];
1254 column1 |= matrix[solution[j]].column[1];
1255 }
1256
1257 /* search for a better replecemnts */
1258 for ( uint32_t j = 0; j < matrix.size(); ++j )
1259 {
1260 if ( __builtin_popcount( column0 | matrix[j].column[0] ) + __builtin_popcount( column1 | matrix[j].column[1] ) != combinations )
1261 continue;
1262 if ( local_cost + matrix[j].cost < best_cost )
1263 {
1264 solution[i] = j;
1265 best_cost = local_cost + matrix[j].cost;
1266 improved = true;
1267 }
1268 }
1269 }
1270
1271 return improved;
1272 }
1273
1274 void adjust_truth_table_on_dc( STT& tt, STT& care, uint32_t real_num_vars, uint32_t var_index )
1275 {
1276 assert( var_index < real_num_vars );
1277 assert( tt.num_vars() == care.num_vars() );
1278
1279 const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) );
1280 if ( real_num_vars <= 6 || var_index < 6 )
1281 {
1282 auto it_tt = std::begin( tt._bits );
1283 auto it_care = std::begin( care._bits );
1284 while ( it_tt != std::begin( tt._bits ) + num_blocks )
1285 {
1286 uint64_t new_bits = *it_tt & *it_care;
1287 *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) |
1288 ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] );
1289 *it_care = ( *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index];
1290 *it_care = *it_care | ( *it_care << ( uint64_t( 1 ) << var_index ) );
1291
1292 ++it_tt;
1293 ++it_care;
1294 }
1295 return;
1296 }
1297
1298 const auto step = 1 << ( var_index - 6 );
1299 for ( auto i = 0u; i < static_cast<uint32_t>( num_blocks ); i += 2 * step )
1300 {
1301 for ( auto j = 0; j < step; ++j )
1302 {
1303 tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] );
1304 tt._bits[i + j + step] = tt._bits[i + j];
1305 care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step];
1306 care._bits[i + j + step] = care._bits[i + j];
1307 }
1308 }
1309 }
1310
1311 void local_extend_to( STT& tt, uint32_t real_num_vars )
1312 {
1313 if ( real_num_vars < 6 )
1314 {
1315 auto mask = *tt.begin();
1316
1317 for ( auto i = real_num_vars; i < num_vars; ++i )
1318 {
1319 mask |= ( mask << ( 1 << i ) );
1320 }
1321
1322 std::fill( tt.begin(), tt.end(), mask );
1323 }
1324 else
1325 {
1326 uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) );
1327 auto it = tt.begin();
1328 while ( it != tt.end() )
1329 {
1330 it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it );
1331 }
1332 }
1333 }
1334
1335 bool has_var_support( const STT& tt, const STT& care, uint32_t real_num_vars, uint8_t var_index )
1336 {
1337 assert( var_index < real_num_vars );
1338 assert( real_num_vars <= tt.num_vars() );
1339 assert( tt.num_vars() == care.num_vars() );
1340
1341 const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) );
1342 if ( real_num_vars <= 6 || var_index < 6 )
1343 {
1344 auto it_tt = std::begin( tt._bits );
1345 auto it_care = std::begin( care._bits );
1346 while ( it_tt != std::begin( tt._bits ) + num_blocks )
1347 {
1348 if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 )
1349 {
1350 return true;
1351 }
1352 ++it_tt;
1353 ++it_care;
1354 }
1355
1356 return false;
1357 }
1358
1359 const auto step = 1 << ( var_index - 6 );
1360 for ( auto i = 0u; i < num_blocks; i += 2 * step )
1361 {
1362 for ( auto j = 0; j < step; ++j )
1363 {
1364 if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 )
1365 {
1366 return true;
1367 }
1368 }
1369 }
1370
1371 return false;
1372 }
1373
1374 void swap_inplace_local( STT& tt, uint8_t var_index1, uint8_t var_index2 )
1375 {
1376 if ( var_index1 == var_index2 )
1377 {
1378 return;
1379 }
1380
1381 if ( var_index1 > var_index2 )
1382 {
1383 std::swap( var_index1, var_index2 );
1384 }
1385
1386 const uint32_t num_blocks = num_vars <= 6 ? 1 : 1 << ( num_vars - 6 );
1387
1388 if ( num_vars <= 6 )
1389 {
1390 const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
1391 const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
1392 tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift );
1393 }
1394 else if ( var_index2 <= 5 )
1395 {
1396 const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
1397 const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
1398 std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ),
1399 [shift, &pmask]( uint64_t word ) {
1400 return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift );
1401 } );
1402 }
1403 else if ( var_index1 <= 5 ) /* in this case, var_index2 > 5 */
1404 {
1405 const auto step = 1 << ( var_index2 - 6 );
1406 const auto shift = 1 << var_index1;
1407 auto it = std::begin( tt._bits );
1408 while ( it != std::begin( tt._bits ) + num_blocks )
1409 {
1410 for ( auto i = decltype( step ){ 0 }; i < step; ++i )
1411 {
1412 const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift;
1413 const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1];
1414 *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low;
1415 *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high;
1416 }
1417 it += 2 * step;
1418 }
1419 }
1420 else
1421 {
1422 const auto step1 = 1 << ( var_index1 - 6 );
1423 const auto step2 = 1 << ( var_index2 - 6 );
1424 auto it = std::begin( tt._bits );
1425 while ( it != std::begin( tt._bits ) + num_blocks )
1426 {
1427 for ( auto i = 0; i < step2; i += 2 * step1 )
1428 {
1429 for ( auto j = 0; j < step1; ++j )
1430 {
1431 std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) );
1432 }
1433 }
1434 it += 2 * step2;
1435 }
1436 }
1437 }
1438
1439 /* Decomposition format for ABC
1440 *
1441 * The record is an array of unsigned chars where:
1442 * - the first unsigned char entry stores the number of unsigned chars in the record
1443 * - the second entry stores the number of LUTs
1444 * After this, several sub-records follow, each representing one LUT as follows:
1445 * - an unsigned char entry listing the number of fanins
1446 * - a list of fanins, from the LSB to the MSB of the truth table. The N inputs of the original function
1447 * have indexes from 0 to N-1, followed by the internal signals in a topological order
1448 * - the LUT truth table occupying 2^(M-3) bytes, where M is the fanin count of the LUT, from the LSB to the MSB.
1449 * A 2-input LUT, which takes 4 bits, should be stretched to occupy 8 bits (one unsigned char)
1450 * A 0- or 1-input LUT can be represented similarly but it is not expected that such LUTs will be represented
1451 */
1452 void get_decomposition_abc( unsigned char* decompArray )
1453 {
1454 unsigned char* pArray = decompArray;
1455 unsigned char bytes = 2;
1456
1457 /* write number of LUTs */
1458 pArray++;
1459 *pArray++ = dec_result.size();
1460
1461 /* write LUTs */
1462 for ( ac_decomposition_result const& lut : dec_result )
1463 {
1464 /* write fanin size*/
1465 *pArray++ = lut.support.size();
1466 ++bytes;
1467
1468 /* write support */
1469 for ( uint32_t i : lut.support )
1470 {
1471 *pArray++ = (unsigned char)i;
1472 ++bytes;
1473 }
1474
1475 /* write truth table */
1476 uint32_t tt_num_bytes = ( lut.tt.num_vars() <= 3 ) ? 1 : ( 1 << ( lut.tt.num_vars() - 3 ) );
1477 tt_num_bytes = std::min( tt_num_bytes, 8u );
1478 for ( uint32_t i = 0; i < lut.tt.num_blocks(); ++i )
1479 {
1480 for ( uint32_t j = 0; j < tt_num_bytes; ++j )
1481 {
1482 *pArray++ = (unsigned char)( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF );
1483 ++bytes;
1484 }
1485 }
1486 }
1487
1488 /* write numBytes */
1489 *decompArray = bytes;
1490 }
1491
1492private:
1493 uint32_t best_multiplicity{ UINT32_MAX };
1494 uint32_t best_free_set{ UINT32_MAX };
1495 STT best_tt;
1496 std::vector<STT> best_bound_sets;
1497 std::vector<STT> best_care_sets;
1498 std::vector<STT> best_free_set_tts;
1499 std::vector<uint64_t> best_iset_onset;
1500 std::vector<uint64_t> best_iset_offset;
1501 std::vector<ac_decomposition_result> dec_result;
1502
1503 std::vector<std::array<uint32_t, 2>> support_minimization_encodings;
1504
1505 uint32_t num_vars;
1506 ac_decomposition_params ps;
1507 ac_decomposition_stats* pst;
1508 std::array<uint32_t, max_num_vars> permutations;
1509};
1510
1511} // namespace acd
1512
1514
1515#endif // _ACD_H_
unsigned int uint32_t
Definition Fxch.h:32
ABC_NAMESPACE_HEADER_START typedef unsigned char uint8_t
Definition Fxch.h:31
#define ABC_NAMESPACE_CXX_HEADER_START
#define ABC_NAMESPACE_CXX_HEADER_END
#define UINT32_MAX
Definition pstdint.h:388
void get_decomposition(unsigned char *decompArray)
int run(word *ptt, unsigned delay_profile)
Runs ACD using late arriving variables.
ac_decomposition_impl(uint32_t num_vars, ac_decomposition_params const &ps, ac_decomposition_stats *pst=nullptr)
unsigned __int64 word
DECLARATIONS ///.
Definition kitPerm.c:36
unsigned long long size
Definition giaNewBdd.h:39
unsigned short var
Definition giaNewBdd.h:35
void create_nth_var(TT &tt, uint8_t var_index, bool complement=false)
Constructs projections (single-variable functions)
bool has_var(const TT &tt, uint8_t var_index)
Checks whether truth table depends on given variable index.
void swap_inplace(TT &tt, uint8_t var_index1, uint8_t var_index2)
Swaps two variables in a truth table.
void print_hex(const TT &tt, std::ostream &os=std::cout)
Prints truth table in hexadecimal representation.
dynamic_truth_table shrink_to(const TTFrom &from, unsigned num_vars)
Shrinks larger truth table to smaller dynamic one.
Parameters for ac_decomposition.
bool use_first
Use the first feasible decomposition found.
bool support_reducing_only
Perform only support reducing (2-level) decompositions.
bool try_no_late_arrival
If decomposition with delay profile fails, try without.
uint32_t max_free_set_vars
Maximum size of the free set (1 < num < 6).
uint32_t lut_size
LUT size for decomposition (3 < num < 7).
kitty::dynamic_truth_table tt
std::vector< uint32_t > support
Statistics for ac_decomposition.
#define assert(ex)
Definition util_old.h:213