89 struct encoding_column
98 static constexpr uint32_t max_num_vars = 11;
103 : num_vars( num_vars ), ps( ps ), pst( pst )
105 std::iota( permutations.begin(), permutations.end(), 0 );
112 if ( num_vars > max_num_vars )
117 uint32_t late_arriving = __builtin_popcount( delay_profile );
120 if ( num_vars > ps.max_free_set_vars + ps.lut_size )
122 ps.max_free_set_vars = num_vars - ps.lut_size;
124 if ( late_arriving > ps.max_free_set_vars )
131 if ( late_arriving > ps.lut_size - 1 )
137 init_truth_table( ptt );
140 reposition_late_arriving_variables( delay_profile, late_arriving );
143 if ( !find_decomposition( delay_profile, late_arriving ) )
149 return delay_profile == 0 ? 2 : 1;
158 std::vector<STT> isets = compute_isets();
160 generate_support_minimization_encodings();
163 if ( best_multiplicity <= 4u )
164 solve_min_support_exact( isets );
166 solve_min_support_heuristic( isets );
169 assert( !best_bound_sets.empty() );
176 unsigned profile = 0;
178 if ( best_free_set > num_vars )
181 for (
uint32_t i = 0; i < best_free_set; ++i )
183 profile |= 1 << permutations[i];
191 if ( best_free_set > num_vars )
194 generate_decomposition();
195 get_decomposition_abc( decompArray );
199 bool find_decomposition(
unsigned& delay_profile,
uint32_t late_arriving )
205 uint32_t start = std::max( offset, 1u );
210 start = std::max( start, num_vars - ps.
lut_size );
214 std::function<
uint32_t( STT
const& tt )> column_multiplicity_fn[5] = {
215 [
this]( STT
const& tt ) {
return column_multiplicity<1u>( tt ); },
216 [
this]( STT
const& tt ) {
return column_multiplicity<2u>( tt ); },
217 [
this]( STT
const& tt ) {
return column_multiplicity<3u>( tt ); },
218 [
this]( STT
const& tt ) {
return column_multiplicity5<4u>( tt ); },
219 [
this]( STT
const& tt ) {
return column_multiplicity5<5u>( tt ); } };
223 for (
uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i )
225 auto ret_tuple = enumerate_iset_combinations( i, offset, column_multiplicity_fn[i - 1] );
226 uint32_t multiplicity = std::get<2>( ret_tuple );
229 uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0;
232 if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 )
234 best_tt = std::get<0>( ret_tuple );
235 permutations = std::get<1>( ret_tuple );
236 best_multiplicity = multiplicity;
237 best_cost = multiplicity + additional_cost;
240 if ( !ps.use_first && multiplicity > 2 )
249 if ( best_multiplicity ==
UINT32_MAX && ( !ps.try_no_late_arrival || late_arriving == 0 ) )
256 if ( ps.support_reducing_only )
258 start = std::max( 1u, num_vars - ps.lut_size );
261 for (
uint32_t i = start; i <= ps.lut_size - 1 && i <= ps.max_free_set_vars; ++i )
263 auto ret_tuple = enumerate_iset_combinations( i, 0, column_multiplicity_fn[i - 1] );
264 uint32_t multiplicity = std::get<2>( ret_tuple );
267 uint32_t additional_cost = ( num_vars - i > ps.lut_size ) ? 128 : 0;
270 if ( multiplicity <= ( 1 << ( ps.lut_size - i ) ) && multiplicity + additional_cost < best_cost && multiplicity <= 16 )
272 best_tt = std::get<0>( ret_tuple );
273 permutations = std::get<1>( ret_tuple );
274 best_multiplicity = multiplicity;
275 best_cost = multiplicity + additional_cost;
278 if ( !ps.use_first && multiplicity > 2 )
294 pst->num_luts = best_multiplicity <= 2 ? 2 : best_multiplicity <= 4 ? 3
295 : best_multiplicity <= 8 ? 4
297 pst->num_edges = ( pst->num_luts - 1 ) * ( num_vars - best_free_set ) + ( pst->num_luts - 1 ) + best_free_set;
303 void init_truth_table(
word* ptt )
305 uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) );
307 for (
uint32_t i = 0; i < num_blocks; ++i )
309 best_tt._bits[i] = ptt[i];
315 template<u
int32_t free_set_size>
316 uint32_t column_multiplicity( STT
const& tt )
318 uint64_t multiplicity_set[4] = { 0u, 0u, 0u, 0u };
320 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
321 uint64_t
constexpr masks_bits[] = { 0x0, 0x3, 0xF, 0x3F };
322 uint64_t
constexpr masks_idx[] = { 0x0, 0x0, 0x0, 0x3 };
325 static_assert( free_set_size <= 3,
"Wrong free set size for method used, expected le 3" );
328 for (
auto i = 0u; i < num_blocks; ++i )
330 uint64_t cof = tt._bits[i];
331 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
333 multiplicity_set[( cof >> 6 ) & masks_idx[free_set_size]] |= UINT64_C( 1 ) << ( cof & masks_bits[free_set_size] );
334 cof >>= ( 1u << free_set_size );
338 multiplicity = __builtin_popcountl( multiplicity_set[0] );
340 if ( free_set_size == 3 )
342 multiplicity += __builtin_popcountl( multiplicity_set[1] );
343 multiplicity += __builtin_popcountl( multiplicity_set[2] );
344 multiplicity += __builtin_popcountl( multiplicity_set[3] );
350 template<u
int32_t free_set_size>
351 uint32_t column_multiplicity5( STT
const& tt )
353 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
354 uint64_t
constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF };
356 static_assert( free_set_size == 5 || free_set_size == 4,
"Wrong free set size for method used, expected of 4 or 5" );
360 std::array<uint32_t, 64> multiplicity_set;
363 for (
auto i = 0u; i < num_blocks; ++i )
365 uint64_t cof = tt._bits[i];
366 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
368 uint64_t fs_fn = cof & masks[free_set_size];
371 multiplicity_set[
size++] =
static_cast<uint32_t>( fs_fn );
374 cof >>= ( 1u << free_set_size );
378 std::sort( multiplicity_set.begin(), multiplicity_set.begin() + size );
382 for (
auto i = 1u; i <
size; ++i )
384 multiplicity += multiplicity_set[i] != multiplicity_set[i - 1] ? 1 : 0;
392 assert( free_set_size <= 5 );
394 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
395 uint64_t
const shift = UINT64_C( 1 ) << free_set_size;
396 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
401 for (
auto i = 0u; i < num_blocks; ++i )
403 uint64_t sub = tt._bits[i];
404 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
408 for ( k = 0; k <
size; ++k )
410 if ( fs_fn == cofactors[k] )
416 cofactors[
size++] = fs_fn;
428 for ( i = k - 1; pComb[i] == num_vars - k + i; --i )
436 uint32_t pos_new = pInvPerm[var_old + 1];
437 std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] );
438 std::swap( pComb[i], pComb[pos_new] );
439 swap_inplace_local( tt, i, pos_new );
441 for (
uint32_t j = i + 1; j < k; j++ )
444 pos_new = pInvPerm[pComb[j - 1] + 1];
445 std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] );
446 std::swap( pComb[j], pComb[pos_new] );
447 swap_inplace_local( tt, j, pos_new );
453 template<
typename Fn>
454 std::tuple<STT, std::array<uint32_t, max_num_vars>,
uint32_t> enumerate_iset_combinations(
uint32_t free_set_size,
uint32_t offset, Fn&& fn )
459 STT local_best_tt = tt;
460 uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
462 assert( free_set_size >= offset );
465 if ( free_set_size == offset )
467 best_cost = fn( tt );
468 return std::make_tuple( tt, permutations, best_cost );
475 if ( free_set_size == ps.lut_size - 1 )
477 return enumerate_iset_combinations2( free_set_size, offset );
481 uint32_t pComb[16], pInvPerm[16], bestPerm[16];
482 for (
uint32_t i = 0; i < num_vars; ++i )
484 pComb[i] = pInvPerm[i] = i;
491 bail_multiplicity = ( best_multiplicity >> 1 ) + ( best_multiplicity & 1 );
498 if ( cost < best_cost )
502 for (
uint32_t i = 0; i < num_vars; ++i )
504 bestPerm[i] = pComb[i];
507 if ( best_cost <= bail_multiplicity )
512 }
while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );
514 std::array<uint32_t, max_num_vars> res_perm;
516 if ( best_cost > ( 1 << ( ps.lut_size - free_set_size ) ) )
518 return std::make_tuple( local_best_tt, res_perm,
UINT32_MAX );
521 for (
uint32_t i = 0; i < num_vars; ++i )
523 res_perm[i] = permutations[bestPerm[i]];
526 return std::make_tuple( local_best_tt, res_perm, best_cost );
529 inline std::tuple<STT, std::array<uint32_t, max_num_vars>,
uint32_t> enumerate_iset_combinations2(
uint32_t free_set_size,
uint32_t offset )
534 STT local_best_tt = tt;
535 uint32_t best_cost = ( 1 << ( ps.lut_size - free_set_size ) ) + 1;
537 assert( free_set_size >= offset );
541 for (
uint32_t i = 0; i < num_vars; ++i )
543 pComb[i] = pInvPerm[i] = i;
547 std::array<uint32_t, max_num_vars> res_perm;
551 uint32_t cost = column_multiplicity2( tt, free_set_size );
556 for (
uint32_t i = 0; i < num_vars; ++i )
558 res_perm[i] = permutations[pComb[i]];
560 return std::make_tuple( local_best_tt, res_perm, best_cost );
562 }
while ( combinations_offset_next( free_set_size, offset, pComb, pInvPerm, tt ) );
564 return std::make_tuple( local_best_tt, res_perm,
UINT32_MAX );
567 std::vector<STT> compute_isets(
bool verbose =
false )
570 uint32_t isets_support = num_vars - best_free_set;
571 std::vector<STT> isets( best_multiplicity );
574 std::unordered_map<uint64_t, uint32_t> column_to_iset;
577 uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
578 uint64_t
constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF };
580 auto it = std::begin( tt );
581 for (
auto i = 0u; i < num_blocks; ++i )
583 for (
auto j = 0; j < ( 64 >> best_free_set ); ++j )
585 uint64_t val = *it & masks[best_free_set];
587 auto el = column_to_iset.find( val );
588 if ( el != column_to_iset.end() )
590 isets[el->second]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset );
594 isets[column_to_iset.size()]._bits[i / ( 1u << best_free_set )] |= UINT64_C( 1 ) << ( j + offset );
595 column_to_iset[val] = column_to_iset.size();
598 *it >>= ( 1u << best_free_set );
601 offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F;
606 for ( STT& iset : isets )
608 local_extend_to( iset, isets_support );
612 std::vector<STT> free_set_tts( best_multiplicity );
614 for (
auto const& pair : column_to_iset )
616 free_set_tts[pair.second]._bits[0] = pair.first;
617 local_extend_to( free_set_tts[pair.second], best_free_set );
623 std::cout <<
"iSets\n";
625 for (
auto iset : isets )
628 std::cout <<
" of func ";
634 best_free_set_tts = std::move( free_set_tts );
639 void generate_decomposition()
644 for (
uint32_t i = 0; i < best_bound_sets.size(); ++i )
646 ac_decomposition_result dec;
647 auto tt = best_bound_sets[i];
648 auto care = best_care_sets[i];
652 for (
uint32_t j = 0; j < num_vars - best_free_set; ++j )
657 adjust_truth_table_on_dc( tt, care, tt.num_vars(), j );
666 dec.support.push_back( permutations[best_free_set + j] );
671 dec_result.push_back( dec );
672 num_edges += dec.support.size() > 1 ? dec.support.size() : 0;
676 compute_top_lut_decomposition();
680 pst->num_luts = dec_result.size();
681 pst->num_edges = num_edges + dec_result.back().support.size();
685 void compute_top_lut_decomposition()
687 uint32_t top_vars = best_bound_sets.size() + best_free_set;
688 assert( top_vars <= ps.lut_size );
691 kitty::dynamic_truth_table tt( top_vars );
694 dec_result.emplace_back();
695 for (
uint32_t i = 0; i < best_free_set; ++i )
697 dec_result.back().support.push_back( permutations[i] );
701 std::vector<kitty::dynamic_truth_table> bound_set_vars;
702 auto res_it = dec_result.begin();
704 for (
uint32_t i = 0; i < best_bound_sets.size(); ++i )
706 bound_set_vars.emplace_back( top_vars );
710 if ( res_it->support.size() == 1 )
712 dec_result.back().support.push_back( res_it->support.front() );
714 if ( ( res_it->tt._bits[0] & 1 ) == 1 )
716 bound_set_vars[i] = ~bound_set_vars[i];
718 dec_result.erase( res_it );
723 dec_result.back().support.push_back( num_vars + i - offset );
729 for (
uint32_t i = 0; i < best_free_set_tts.size(); ++i )
731 kitty::dynamic_truth_table free_set_tt =
kitty::shrink_to( best_free_set_tts[i], top_vars );
734 for (
uint32_t j = 0; j < bound_set_vars.size(); ++j )
737 if ( ( ( best_iset_onset[j] >> i ) & 1 ) )
739 free_set_tt &= bound_set_vars[j];
741 else if ( ( ( best_iset_offset[j] >> i ) & 1 ) )
743 free_set_tt &= ~bound_set_vars[j];
751 dec_result.back().tt = tt;
754 inline void reposition_late_arriving_variables(
unsigned delay_profile,
uint32_t late_arriving )
757 for (
uint32_t i = 0; i < late_arriving; ++i )
759 while ( ( ( delay_profile >> k ) & 1 ) == 0 )
762 if ( permutations[i] == k )
768 std::swap( permutations[i], permutations[k] );
769 swap_inplace_local( best_tt, i, k );
774 template<
class Iterator>
775 void print_perm( Iterator begin,
uint32_t free_set )
778 for (
uint32_t i = 0; i < num_vars; ++i )
784 std::cout << *begin <<
" ";
790 void generate_support_minimization_encodings()
796 if ( __builtin_popcount( best_multiplicity ) == 1 )
798 uint32_t num_combs_exact[4] = { 1, 3, 35, 6435 };
801 if ( ( best_multiplicity >> i ) == 2u )
803 num_combs = num_combs_exact[i];
806 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
807 generate_support_minimization_encodings_rec<false, true>( 0, 0, 0, count, best_multiplicity >> 1,
true );
808 assert( count == num_combs );
813 int32_t min_set_size = 1;
814 if ( best_multiplicity <= 4 )
816 else if ( best_multiplicity <= 8 )
820 min_set_size = best_multiplicity - min_set_size;
822 if ( best_multiplicity > 8 )
825 uint32_t class_sizes[13] = { 3, 3, 15, 25, 35, 35, 255, 501, 957, 1749, 3003, 4719, 6435 };
826 num_combs = class_sizes[best_multiplicity - 3];
827 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
828 generate_support_minimization_encodings_rec<false, false>( 0, 0, 0, count, min_set_size,
true );
833 uint32_t class_sizes[13] = { 6, 3, 90, 130, 105, 35, 9330, 23436, 48708, 78474, 91377, 70785, 32175 };
834 num_combs = class_sizes[best_multiplicity - 3];
835 support_minimization_encodings = std::vector<std::array<uint32_t, 2>>( num_combs );
836 generate_support_minimization_encodings_rec<true, false>( 0, 0, 0, count, min_set_size,
true );
839 assert( count == num_combs );
842 template<
bool enable_dcset,
bool equal_size_partition>
845 if ( var == best_multiplicity )
847 if ( equal_size_partition )
850 if ( __builtin_popcount( onset ) != __builtin_popcount( offset ) )
855 else if ( __builtin_popcount( onset ) < min_set_size || __builtin_popcount( offset ) < min_set_size )
861 support_minimization_encodings[count][0] = onset;
862 support_minimization_encodings[count][1] = offset;
870 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size, first );
875 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size,
false );
876 onset &= ~( 1 <<
var );
886 generate_support_minimization_encodings_rec<enable_dcset, equal_size_partition>( onset, offset, var + 1, count, min_set_size,
false );
887 offset &= ~( 1 <<
var );
890 void solve_min_support_exact( std::vector<STT>
const& isets )
892 std::vector<encoding_column> matrix;
893 matrix.reserve( support_minimization_encodings.size() );
894 best_bound_sets.clear();
897 if ( !create_covering_matrix<false>( isets, matrix,
false ) )
903 std::array<uint32_t, 6> solution = covering_solve_exact( matrix );
912 uint32_t num_luts = 1 + solution[5];
914 uint32_t num_edges = best_free_set + solution[5];
915 uint32_t isets_support = num_vars - best_free_set;
916 best_care_sets.clear();
917 best_iset_onset.clear();
918 best_iset_offset.clear();
919 for (
uint32_t i = 0; i < solution[5]; ++i )
924 const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0];
925 const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1];
926 for (
uint32_t j = 0; j < best_multiplicity; ++j )
928 if ( ( ( onset >> j ) & 1 ) )
932 if ( ( ( offset >> j ) & 1 ) )
939 num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 );
941 best_bound_sets.push_back( tt );
942 best_care_sets.push_back( care );
943 best_iset_onset.push_back( onset );
944 best_iset_offset.push_back( offset );
949 pst->num_luts = num_luts;
950 pst->num_levels = num_levels;
951 pst->num_edges = num_edges;
955 void solve_min_support_heuristic( std::vector<STT>
const& isets )
957 std::vector<encoding_column> matrix;
958 matrix.reserve( support_minimization_encodings.size() );
959 best_bound_sets.clear();
962 if ( !create_covering_matrix<true>( isets, matrix,
true ) )
968 std::array<uint32_t, 6> solution = covering_solve_heuristic( matrix );
977 while ( covering_improve( matrix, solution ) )
981 uint32_t num_luts = 1 + solution[5];
983 uint32_t num_edges = best_free_set + solution[5];
984 uint32_t isets_support = num_vars - best_free_set;
985 best_care_sets.clear();
986 best_iset_onset.clear();
987 best_iset_offset.clear();
988 for (
uint32_t i = 0; i < solution[5]; ++i )
993 const uint32_t onset = support_minimization_encodings[matrix[solution[i]].index][0];
994 const uint32_t offset = support_minimization_encodings[matrix[solution[i]].index][1];
995 for (
uint32_t j = 0; j < best_multiplicity; ++j )
997 if ( ( ( onset >> j ) & 1 ) )
1001 if ( ( ( offset >> j ) & 1 ) )
1008 num_edges += matrix[solution[i]].cost & ( ( 1 << isets_support ) - 1 );
1010 best_bound_sets.push_back( tt );
1011 best_care_sets.push_back( care );
1012 best_iset_onset.push_back( onset );
1013 best_iset_offset.push_back( offset );
1018 pst->num_luts = num_luts;
1019 pst->num_levels = num_levels;
1020 pst->num_edges = num_edges;
1024 template<
bool UseHeuristic>
1025 bool create_covering_matrix( std::vector<STT>
const& isets, std::vector<encoding_column>& matrix,
bool sort )
1027 assert( best_multiplicity <= 16 );
1028 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1029 uint32_t iset_support = num_vars - best_free_set;
1032 for (
uint32_t i = 0; i < support_minimization_encodings.size(); ++i )
1034 uint32_t const onset = support_minimization_encodings[i][0];
1035 uint32_t const offset = support_minimization_encodings[i][1];
1038 uint64_t column[2] = { 0, 0 };
1042 for (
uint32_t j = 0; j < best_multiplicity; ++j )
1044 auto onset_shift = ( onset >> j );
1045 auto offset_shift = ( offset >> j );
1046 if ( ( onset_shift & 1 ) )
1051 if ( ( offset_shift & 1 ) )
1057 for (
uint32_t k = j + 1; k < best_multiplicity; ++k )
1060 if ( ( ( ( onset_shift & ( offset >> k ) ) | ( ( onset >> k ) & offset_shift ) ) & 1 ) )
1062 column[pair_pointer >> 6u] |= UINT64_C( 1 ) << ( pair_pointer & 0x3F );
1073 for (
uint32_t j = 0; j < iset_support; ++j )
1075 cost += has_var_support( tt, care, iset_support, j ) ? 1 : 0;
1086 if ( cost > ps.lut_size )
1093 float sort_cost = 0;
1096 sort_cost = 1.0f / ( __builtin_popcountl( column[0] ) + __builtin_popcountl( column[1] ) );
1100 sort_cost = cost + ( ( combinations - __builtin_popcountl( column[0] + __builtin_popcountl( column[1] ) ) ) << num_vars );
1104 matrix.emplace_back( encoding_column{ { column[0], column[1] }, cost, i, sort_cost } );
1114 std::sort( matrix.begin(), matrix.end(), [&]( encoding_column
const& a, encoding_column
const& b ) {
1115 return a.cost < b.cost;
1120 std::sort( matrix.begin(), matrix.end(), [&]( encoding_column
const& a, encoding_column
const& b ) {
1121 return a.sort_cost < b.sort_cost;
1128 std::array<uint32_t, 6> covering_solve_exact( std::vector<encoding_column>& matrix )
1131 std::array<uint32_t, 6> res = {
UINT32_MAX };
1133 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1135 assert( best_multiplicity <= 4 );
1138 if ( best_multiplicity <= 2 )
1143 else if ( best_multiplicity <= 4 )
1146 for (
uint32_t i = 0; i < matrix.size() - 1; ++i )
1148 for (
uint32_t j = 1; j < matrix.size(); ++j )
1151 if ( matrix[i].cost + matrix[j].cost >= best_cost )
1155 if ( __builtin_popcountl( matrix[i].column[0] | matrix[j].column[0] ) + __builtin_popcountl( matrix[i].column[1] | matrix[j].column[1] ) == combinations )
1159 best_cost = matrix[i].cost + matrix[j].cost;
1168 std::array<uint32_t, 6> covering_solve_heuristic( std::vector<encoding_column>& matrix )
1171 std::array<uint32_t, 6> res = {
UINT32_MAX };
1172 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1173 uint64_t column0 = 0, column1 = 0;
1176 float best_cost = std::numeric_limits<float>::max();
1177 for (
uint32_t i = 0; i < matrix.size(); ++i )
1179 if ( matrix[i].sort_cost < best_cost )
1182 best_cost = matrix[i].sort_cost;
1187 column0 = matrix[best].column[0];
1188 column1 = matrix[best].column[1];
1189 std::swap( matrix[0], matrix[best] );
1194 while ( iter < ps.lut_size - best_free_set && __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) != combinations )
1198 best_cost = std::numeric_limits<float>::max();
1199 for (
uint32_t i = iter; i < matrix.size(); ++i )
1201 float local_cost = 1.0f / ( __builtin_popcountl( matrix[i].column[0] & ~column0 ) + __builtin_popcountl( matrix[i].column[1] & ~column1 ) );
1202 if ( local_cost < best_cost )
1205 best_cost = local_cost;
1209 column0 |= matrix[best].column[0];
1210 column1 |= matrix[best].column[1];
1211 std::swap( matrix[iter], matrix[best] );
1215 if ( __builtin_popcountl( column0 ) + __builtin_popcountl( column1 ) == combinations )
1217 for (
uint32_t i = 0; i < iter; ++i )
1227 bool covering_improve( std::vector<encoding_column>
const& matrix, std::array<uint32_t, 6>& solution )
1230 uint32_t best_cost = 0, local_cost = 0;
1231 uint32_t num_elements = solution[5];
1232 uint32_t combinations = ( best_multiplicity * ( best_multiplicity - 1 ) ) / 2;
1233 bool improved =
false;
1236 for (
uint32_t i = 0; i < num_elements; ++i )
1238 best_cost += matrix[solution[i]].cost;
1241 uint64_t column0, column1;
1242 for (
uint32_t i = 0; i < num_elements; ++i )
1248 for (
uint32_t j = 0; j < num_elements; ++j )
1252 local_cost += matrix[solution[j]].cost;
1253 column0 |= matrix[solution[j]].column[0];
1254 column1 |= matrix[solution[j]].column[1];
1258 for (
uint32_t j = 0; j < matrix.size(); ++j )
1260 if ( __builtin_popcount( column0 | matrix[j].column[0] ) + __builtin_popcount( column1 | matrix[j].column[1] ) != combinations )
1262 if ( local_cost + matrix[j].cost < best_cost )
1265 best_cost = local_cost + matrix[j].cost;
1274 void adjust_truth_table_on_dc( STT& tt, STT& care,
uint32_t real_num_vars,
uint32_t var_index )
1276 assert( var_index < real_num_vars );
1277 assert( tt.num_vars() == care.num_vars() );
1279 const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) );
1280 if ( real_num_vars <= 6 || var_index < 6 )
1282 auto it_tt = std::begin( tt._bits );
1283 auto it_care = std::begin( care._bits );
1284 while ( it_tt != std::begin( tt._bits ) + num_blocks )
1286 uint64_t new_bits = *it_tt & *it_care;
1287 *it_tt = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) |
1288 ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] );
1289 *it_care = ( *it_care | ( *it_care >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index];
1290 *it_care = *it_care | ( *it_care << ( uint64_t( 1 ) << var_index ) );
1298 const auto step = 1 << ( var_index - 6 );
1299 for (
auto i = 0u; i < static_cast<uint32_t>( num_blocks ); i += 2 * step )
1301 for (
auto j = 0; j < step; ++j )
1303 tt._bits[i + j] = ( tt._bits[i + j] & care._bits[i + j] ) | ( tt._bits[i + j + step] & care._bits[i + j + step] );
1304 tt._bits[i + j + step] = tt._bits[i + j];
1305 care._bits[i + j] = care._bits[i + j] | care._bits[i + j + step];
1306 care._bits[i + j + step] = care._bits[i + j];
1311 void local_extend_to( STT& tt,
uint32_t real_num_vars )
1313 if ( real_num_vars < 6 )
1315 auto mask = *tt.begin();
1317 for (
auto i = real_num_vars; i < num_vars; ++i )
1319 mask |= ( mask << ( 1 << i ) );
1322 std::fill( tt.begin(), tt.end(), mask );
1326 uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) );
1327 auto it = tt.begin();
1328 while ( it != tt.end() )
1330 it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it );
1335 bool has_var_support(
const STT& tt,
const STT& care,
uint32_t real_num_vars,
uint8_t var_index )
1337 assert( var_index < real_num_vars );
1338 assert( real_num_vars <= tt.num_vars() );
1339 assert( tt.num_vars() == care.num_vars() );
1341 const uint32_t num_blocks = real_num_vars <= 6 ? 1 : ( 1 << ( real_num_vars - 6 ) );
1342 if ( real_num_vars <= 6 || var_index < 6 )
1344 auto it_tt = std::begin( tt._bits );
1345 auto it_care = std::begin( care._bits );
1346 while ( it_tt != std::begin( tt._bits ) + num_blocks )
1348 if ( ( ( ( *it_tt >> ( uint64_t( 1 ) << var_index ) ) ^ *it_tt ) & kitty::detail::projections_neg[var_index] & ( *it_care >> ( uint64_t( 1 ) << var_index ) ) & *it_care ) != 0 )
1359 const auto step = 1 << ( var_index - 6 );
1360 for (
auto i = 0u; i < num_blocks; i += 2 * step )
1362 for (
auto j = 0; j < step; ++j )
1364 if ( ( ( tt._bits[i + j] ^ tt._bits[i + j + step] ) & care._bits[i + j] & care._bits[i + j + step] ) != 0 )
1374 void swap_inplace_local( STT& tt,
uint8_t var_index1,
uint8_t var_index2 )
1376 if ( var_index1 == var_index2 )
1381 if ( var_index1 > var_index2 )
1383 std::swap( var_index1, var_index2 );
1386 const uint32_t num_blocks = num_vars <= 6 ? 1 : 1 << ( num_vars - 6 );
1388 if ( num_vars <= 6 )
1390 const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
1391 const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
1392 tt._bits[0] = ( tt._bits[0] & pmask[0] ) | ( ( tt._bits[0] & pmask[1] ) << shift ) | ( ( tt._bits[0] & pmask[2] ) >> shift );
1394 else if ( var_index2 <= 5 )
1396 const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
1397 const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
1398 std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ),
1399 [shift, &pmask]( uint64_t
word ) {
1400 return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift );
1403 else if ( var_index1 <= 5 )
1405 const auto step = 1 << ( var_index2 - 6 );
1406 const auto shift = 1 << var_index1;
1407 auto it = std::begin( tt._bits );
1408 while ( it != std::begin( tt._bits ) + num_blocks )
1410 for (
auto i =
decltype( step ){ 0 }; i < step; ++i )
1412 const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift;
1413 const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1];
1414 *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low;
1415 *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high;
1422 const auto step1 = 1 << ( var_index1 - 6 );
1423 const auto step2 = 1 << ( var_index2 - 6 );
1424 auto it = std::begin( tt._bits );
1425 while ( it != std::begin( tt._bits ) + num_blocks )
1427 for (
auto i = 0; i < step2; i += 2 * step1 )
1429 for (
auto j = 0; j < step1; ++j )
1431 std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) );
1452 void get_decomposition_abc(
unsigned char* decompArray )
1454 unsigned char* pArray = decompArray;
1455 unsigned char bytes = 2;
1459 *pArray++ = dec_result.size();
1462 for ( ac_decomposition_result
const& lut : dec_result )
1465 *pArray++ = lut.support.size();
1471 *pArray++ = (
unsigned char)i;
1476 uint32_t tt_num_bytes = ( lut.tt.num_vars() <= 3 ) ? 1 : ( 1 << ( lut.tt.num_vars() - 3 ) );
1477 tt_num_bytes = std::min( tt_num_bytes, 8u );
1478 for (
uint32_t i = 0; i < lut.tt.num_blocks(); ++i )
1480 for (
uint32_t j = 0; j < tt_num_bytes; ++j )
1482 *pArray++ = (
unsigned char)( ( lut.tt._bits[i] >> ( 8 * j ) ) & 0xFF );
1489 *decompArray = bytes;
1496 std::vector<STT> best_bound_sets;
1497 std::vector<STT> best_care_sets;
1498 std::vector<STT> best_free_set_tts;
1499 std::vector<uint64_t> best_iset_onset;
1500 std::vector<uint64_t> best_iset_offset;
1501 std::vector<ac_decomposition_result> dec_result;
1503 std::vector<std::array<uint32_t, 2>> support_minimization_encodings;
1506 ac_decomposition_params ps;
1507 ac_decomposition_stats* pst;
1508 std::array<uint32_t, max_num_vars> permutations;