55 static constexpr uint32_t max_num_vars = 11;
60 explicit acd66_impl(
uint32_t const num_vars,
bool multiple_shared_set =
false,
bool const verify =
false )
61 : num_vars( num_vars ), multiple_ss( multiple_shared_set ), verify( verify )
63 std::iota( permutations.begin(), permutations.end(), 0 );
72 if ( num_vars > max_num_vars || num_vars > 11 )
78 init_truth_table( ptt );
81 return find_decomposition();
85 int run(
word* ptt,
unsigned delay_profile )
90 if ( num_vars > max_num_vars || num_vars > 11 )
95 uint32_t late_arriving = __builtin_popcount( delay_profile );
98 if ( late_arriving > 5 )
102 init_truth_table( ptt );
106 reposition_late_arriving_variables( delay_profile, late_arriving );
109 return find_decomposition_offset( late_arriving ) ? ( delay_profile == 0 ? 2 : 1 ) : 0;
117 compute_decomposition_impl();
119 if ( verify && !verify_impl() )
131 return num_vars + 1 + num_shared_vars;
135 return bs_support_size + best_free_set + 1 + num_shared_vars;
141 unsigned profile = 0;
148 for (
uint32_t i = 0; i < best_free_set; ++i )
150 profile |= 1 << permutations[i];
155 for (
uint32_t i = 0; i < bs_support_size; ++i )
157 profile |= 1 << permutations[bs_support[i] + best_free_set];
159 profile = ~profile & ( ( 1u << num_vars ) - 1 );
170 get_decomposition_abc( decompArray );
174 bool find_decomposition()
182 return find_decomposition_bs_multi_ss( num_vars - 6 );
185 uint32_t max_free_set = num_vars == 11 ? 5 : 4;
188 for (
uint32_t i = num_vars - 6; i <= max_free_set; ++i )
190 if ( find_decomposition_bs( i ) )
198 bool find_decomposition_offset(
uint32_t offset )
206 return find_decomposition_bs_offset_multi_ss( std::max( num_vars - 6, offset ), offset );
209 uint32_t max_free_set = ( num_vars == 11 || offset == 5 ) ? 5 : 4;
212 for (
uint32_t i = std::max( num_vars - 6, offset ); i <= max_free_set; ++i )
214 if ( find_decomposition_bs_offset( i, offset ) )
222 void init_truth_table(
word* ptt )
224 uint32_t const num_blocks = ( num_vars <= 6 ) ? 1 : ( 1 << ( num_vars - 6 ) );
226 for (
uint32_t i = 0; i < num_blocks; ++i )
228 start_tt._bits[i] = ptt[i];
231 local_extend_to( start_tt, num_vars );
236 assert( free_set_size <= 5 );
238 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
239 uint64_t
const shift = UINT64_C( 1 ) << free_set_size;
240 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
241 uint32_t const limit = free_set_size < 5 ? 4 : 2;
246 for (
auto i = 0u; i < num_blocks; ++i )
248 uint64_t sub = tt._bits[i];
249 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
253 for ( k = 0; k <
size; ++k )
255 if ( fs_fn == cofactors[k] )
261 cofactors[
size++] = fs_fn;
271 assert( free_set_size <= 5 );
273 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
274 uint64_t
const shift = UINT64_C( 1 ) << free_set_size;
275 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
280 for (
auto i = 0u; i < num_blocks; ++i )
282 uint64_t sub = tt._bits[i];
283 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
287 for ( k = 0; k <
size; ++k )
289 if ( fs_fn == cofactors[k] )
295 cofactors[
size++] = fs_fn;
307 for ( i = k - 1; pComb[i] == num_vars - k + i; --i )
315 uint32_t pos_new = pInvPerm[var_old + 1];
316 std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] );
317 std::swap( pComb[i], pComb[pos_new] );
318 swap_inplace_local( tt, i, pos_new );
320 for (
uint32_t j = i + 1; j < k; j++ )
323 pos_new = pInvPerm[pComb[j - 1] + 1];
324 std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] );
325 std::swap( pComb[j], pComb[pos_new] );
326 swap_inplace_local( tt, j, pos_new );
336 for ( i = k - 1; pComb[i] ==
size - k + i; --i )
344 uint32_t pos_new = pInvPerm[var_old + 1];
345 std::swap( pInvPerm[var_old + 1], pInvPerm[var_old] );
346 std::swap( pComb[i], pComb[pos_new] );
348 for (
uint32_t j = i + 1; j < k; j++ )
351 pos_new = pInvPerm[pComb[j - 1] + 1];
352 std::swap( pInvPerm[pComb[j - 1] + 1], pInvPerm[var_old] );
353 std::swap( pComb[j], pComb[pos_new] );
359 bool find_decomposition_bs(
uint32_t free_set_size )
368 for (
uint32_t i = 0; i < num_vars; ++i )
370 pComb[i] = pInvPerm[i] = i;
374 best_free_set = free_set_size;
377 uint32_t cost = column_multiplicity( tt, free_set_size );
381 best_multiplicity = cost;
382 for (
uint32_t i = 0; i < num_vars; ++i )
384 permutations[i] = pComb[i];
388 else if ( cost <= 4 && free_set_size < 5 )
391 best_multiplicity = cost;
392 int res = check_shared_set( tt );
397 for (
uint32_t i = 0; i < num_vars; ++i )
399 permutations[i] = pComb[i];
402 swap_inplace_local( best_tt, res, num_vars - 1 );
403 std::swap( permutations[res], permutations[num_vars - 1] );
408 }
while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) );
413 bool find_decomposition_bs_offset(
uint32_t free_set_size,
uint32_t offset )
419 best_free_set = free_set_size;
422 if ( free_set_size == offset )
424 uint32_t cost = column_multiplicity( tt, free_set_size );
428 best_multiplicity = cost;
431 else if ( cost <= 4 && free_set_size < 5 )
434 best_multiplicity = cost;
435 int res = check_shared_set( tt );
441 swap_inplace_local( best_tt, res, num_vars - 1 );
442 std::swap( permutations[res], permutations[num_vars - 1] );
452 for (
uint32_t i = 0; i < num_vars; ++i )
454 pComb[i] = pInvPerm[i] = i;
460 uint32_t cost = column_multiplicity( tt, free_set_size );
464 best_multiplicity = cost;
465 for (
uint32_t i = 0; i < num_vars; ++i )
467 pInvPerm[i] = permutations[pComb[i]];
469 for (
uint32_t i = 0; i < num_vars; ++i )
471 permutations[i] = pInvPerm[i];
475 else if ( cost <= 4 && free_set_size < 5 )
478 best_multiplicity = cost;
479 int res = check_shared_set( tt );
484 for (
uint32_t i = 0; i < num_vars; ++i )
486 pInvPerm[i] = permutations[pComb[i]];
488 for (
uint32_t i = 0; i < num_vars; ++i )
490 permutations[i] = pInvPerm[i];
493 swap_inplace_local( best_tt, res, num_vars - 1 );
494 std::swap( permutations[res], permutations[num_vars - 1] );
499 }
while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) );
504 bool find_decomposition_bs_multi_ss(
uint32_t free_set_size )
512 uint32_t pComb[16], pInvPerm[16], shared_set[4];
513 for (
uint32_t i = 0; i < num_vars; ++i )
515 pComb[i] = pInvPerm[i] = i;
519 best_free_set = free_set_size;
522 uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) );
526 best_multiplicity = cost;
527 for (
uint32_t i = 0; i < num_vars; ++i )
529 permutations[i] = pComb[i];
534 uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2
538 if ( ss_vars_needed + free_set_size < 6 )
541 best_multiplicity = cost;
542 int res = check_shared_set_multi( tt, ss_vars_needed, shared_set );
547 for (
uint32_t i = 0; i < num_vars; ++i )
549 permutations[i] = pComb[i];
552 for ( int32_t i = res - 1; i >= 0; --i )
554 swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i );
555 std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] );
557 num_shared_vars = res;
561 }
while ( combinations_next( free_set_size, 0, pComb, pInvPerm, tt ) );
567 bool find_decomposition_bs_offset_multi_ss(
uint32_t free_set_size,
uint32_t offset )
573 best_free_set = free_set_size;
577 if ( free_set_size == offset )
579 uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) );
583 best_multiplicity = cost;
587 uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2
592 if ( ss_vars_needed + free_set_size < 6 )
595 best_multiplicity = cost;
596 int res = check_shared_set_multi( tt, ss_vars_needed, shared_set );
602 for ( int32_t i = res - 1; i >= 0; --i )
604 swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i );
605 std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] );
607 num_shared_vars = res;
618 for (
uint32_t i = 0; i < num_vars; ++i )
620 pComb[i] = pInvPerm[i] = i;
626 uint32_t cost = column_multiplicity2( tt, free_set_size, 1 << ( 6 - free_set_size ) );
630 best_multiplicity = cost;
631 for (
uint32_t i = 0; i < num_vars; ++i )
633 pInvPerm[i] = permutations[pComb[i]];
635 for (
uint32_t i = 0; i < num_vars; ++i )
637 permutations[i] = pInvPerm[i];
642 uint32_t ss_vars_needed = cost <= 4 ? 1 : cost <= 8 ? 2
647 if ( ss_vars_needed + free_set_size < 6 )
650 best_multiplicity = cost;
651 int res = check_shared_set_multi( tt, ss_vars_needed, shared_set );
656 for (
uint32_t i = 0; i < num_vars; ++i )
658 pInvPerm[i] = permutations[pComb[i]];
660 for (
uint32_t i = 0; i < num_vars; ++i )
662 permutations[i] = pInvPerm[i];
665 for ( int32_t i = res - 1; i >= 0; --i )
667 swap_inplace_local( best_tt, shared_set[i] + best_free_set, num_vars - res + i );
668 std::swap( permutations[shared_set[i] + best_free_set], permutations[num_vars - res + i] );
670 num_shared_vars = res;
674 }
while ( combinations_next( free_set_size, offset, pComb, pInvPerm, tt ) );
680 bool check_shared_var( STT
const& tt,
uint32_t free_set_size,
uint32_t shared_var )
682 assert( free_set_size <= 5 );
684 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
685 uint64_t
const shift = UINT64_C( 1 ) << free_set_size;
686 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
689 uint32_t shared_var_shift = shared_var - free_set_size;
693 for (
auto i = 0u; i < num_blocks; ++i )
695 uint64_t sub = tt._bits[i];
696 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
699 uint32_t p = ( iteration_counter >> shared_var_shift ) & 1;
701 for ( k = 0; k <
size[
p]; ++k )
703 if ( fs_fn == cofactors[
p][k] )
709 cofactors[
p][
size[
p]++] = fs_fn;
718 inline int check_shared_set( STT
const& tt )
721 for (
uint32_t i = best_free_set; i < num_vars; ++i )
724 if ( check_shared_var( tt, best_free_set, i ) )
733 bool check_shared_var_combined( STT
const& tt,
uint32_t free_set_size,
uint32_t shared_vars[6],
uint32_t num_shared_vars )
735 assert( free_set_size <= 5 );
736 assert( num_shared_vars <= 4 );
738 uint32_t const num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
739 uint64_t
const shift = UINT64_C( 1 ) << free_set_size;
740 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
746 for (
auto i = 0u; i < num_blocks; ++i )
748 uint64_t sub = tt._bits[i];
749 for (
auto j = 0; j < ( 64 >> free_set_size ); ++j )
753 for (
uint32_t k = 0; k < num_shared_vars; ++k )
755 p |= ( ( iteration_counter >> shared_vars[k] ) & 1 ) << k;
759 for ( k = 0; k <
size[
p]; ++k )
761 if ( fs_fn == cofactors[
p][k] )
767 cofactors[
p][
size[
p]++] = fs_fn;
776 inline int check_shared_set_multi( STT
const& tt,
uint32_t target_num_ss,
uint32_t* res_shared )
782 for (
uint32_t i = target_num_ss; i < 6 - best_free_set; ++i )
786 pComb[i] = pInvPerm[i] = i;
792 if ( check_shared_var_combined( tt, best_free_set, pComb, i ) )
796 res_shared[j] = pComb[j];
799 std::sort( res_shared, res_shared + i );
802 }
while ( combinations_next_simple( i, pComb, pInvPerm, num_vars - best_free_set ) );
808 void compute_decomposition_impl(
bool verbose =
false )
817 uint32_t num_blocks = ( num_vars > 6 ) ? ( 1u << ( num_vars - 6 ) ) : 1;
818 uint64_t
const shift = UINT64_C( 1 ) << best_free_set;
819 uint64_t
const mask = ( UINT64_C( 1 ) << shift ) - 1;
820 uint32_t const num_groups = 1 << num_shared_vars;
821 uint32_t const next_group = 1 << ( num_vars - best_free_set - num_shared_vars );
823 uint64_t fs_fun[32] = { 0 };
824 uint64_t dc_mask = ( ( UINT64_C( 1 ) << next_group ) - 1 );
828 fs_fun[0] = best_tt._bits[0] & mask;
830 for (
auto i = 0u; i < num_blocks; ++i )
832 uint64_t cof = best_tt._bits[i];
833 for (
auto j = 0; j < ( 64 >> best_free_set ); ++j )
835 uint64_t val = cof & mask;
837 if ( set_index == next_group )
842 fs_fun[group_index + 1] = fs_fun[group_index];
843 bs_dc._bits |= dc_mask;
849 fs_fun[group_index] = val;
850 dc_mask <<= next_group;
853 if ( val != fs_fun[group_index] )
855 bs._bits |= UINT64_C( 1 ) << ( j + offset );
856 fs_fun[group_index + 1] = val;
862 offset = ( offset + ( 64 >> best_free_set ) ) & 0x3F;
868 fs_fun[group_index + 1] = fs_fun[group_index];
869 bs_dc._bits |= dc_mask;
873 for (
uint32_t i = 0; i < 2 * num_groups; ++i )
875 composition._bits |= fs_fun[i] << ( i * shift );
881 uint64_t
constexpr masks[] = { 0x0, 0x3, 0xF, 0xFF, 0xFFFF, 0xFFFFFFFF, UINT64_MAX };
882 care._bits = masks[num_vars - best_free_set] & ~bs_dc._bits;
883 for (
uint32_t i = 0; i < num_vars - best_free_set; ++i )
885 if ( !has_var6( bs, care, i ) )
887 adjust_truth_table_on_dc( bs, care, i );
891 if ( bs_support_size < i )
896 bs_support[bs_support_size] = i;
901 dec_funcs[0] = bs._bits;
902 dec_funcs[1] = composition._bits;
908 f._bits = dec_funcs[0];
909 std::cout <<
"BS function : ";
912 f._bits = dec_funcs[1];
913 std::cout <<
"Composition function: ";
919 template<
typename TT_type>
920 void local_extend_to( TT_type& tt,
uint32_t real_num_vars )
922 if ( real_num_vars < 6 )
924 auto mask = *tt.begin();
926 for (
auto i = real_num_vars; i < num_vars; ++i )
928 mask |= ( mask << ( 1 << i ) );
931 std::fill( tt.begin(), tt.end(), mask );
935 uint32_t num_blocks = ( 1u << ( real_num_vars - 6 ) );
936 auto it = tt.begin();
937 while ( it != tt.end() )
939 it = std::copy( tt.cbegin(), tt.cbegin() + num_blocks, it );
944 inline void reposition_late_arriving_variables(
unsigned delay_profile,
uint32_t late_arriving )
947 for (
uint32_t i = 0; i < late_arriving; ++i )
949 while ( ( ( delay_profile >> k ) & 1 ) == 0 )
952 if ( permutations[i] == k )
958 std::swap( permutations[i], permutations[k] );
959 swap_inplace_local( best_tt, i, k );
964 void swap_inplace_local( STT& tt,
uint8_t var_index1,
uint8_t var_index2 )
966 if ( var_index1 == var_index2 )
971 if ( var_index1 > var_index2 )
973 std::swap( var_index1, var_index2 );
977 const uint32_t num_blocks = 1 << ( num_vars - 6 );
979 if ( var_index2 <= 5 )
981 const auto& pmask = kitty::detail::ppermutation_masks[var_index1][var_index2];
982 const auto shift = ( 1 << var_index2 ) - ( 1 << var_index1 );
983 std::transform( std::begin( tt._bits ), std::begin( tt._bits ) + num_blocks, std::begin( tt._bits ),
984 [shift, &pmask]( uint64_t
word ) {
985 return ( word & pmask[0] ) | ( ( word & pmask[1] ) << shift ) | ( ( word & pmask[2] ) >> shift );
988 else if ( var_index1 <= 5 )
990 const auto step = 1 << ( var_index2 - 6 );
991 const auto shift = 1 << var_index1;
992 auto it = std::begin( tt._bits );
993 while ( it != std::begin( tt._bits ) + num_blocks )
995 for (
auto i =
decltype( step ){ 0 }; i < step; ++i )
997 const auto low_to_high = ( *( it + i ) & kitty::detail::projections[var_index1] ) >> shift;
998 const auto high_to_low = ( *( it + i + step ) << shift ) & kitty::detail::projections[var_index1];
999 *( it + i ) = ( *( it + i ) & ~kitty::detail::projections[var_index1] ) | high_to_low;
1000 *( it + i + step ) = ( *( it + i + step ) & kitty::detail::projections[var_index1] ) | low_to_high;
1007 const auto step1 = 1 << ( var_index1 - 6 );
1008 const auto step2 = 1 << ( var_index2 - 6 );
1009 auto it = std::begin( tt._bits );
1010 while ( it != std::begin( tt._bits ) + num_blocks )
1012 for (
auto i = 0; i < step2; i += 2 * step1 )
1014 for (
auto j = 0; j < step1; ++j )
1016 std::swap( *( it + i + j + step1 ), *( it + i + j + step2 ) );
1024 inline bool has_var6(
const LTT& tt,
const LTT& care,
uint8_t var_index )
1026 if ( ( ( ( tt._bits >> ( uint64_t( 1 ) << var_index ) ) ^ tt._bits ) & kitty::detail::projections_neg[var_index] & ( care._bits >> ( uint64_t( 1 ) << var_index ) ) & care._bits ) != 0 )
1034 void adjust_truth_table_on_dc( LTT& tt, LTT& care,
uint32_t var_index )
1036 uint64_t new_bits = tt._bits & care._bits;
1037 tt._bits = ( ( new_bits | ( new_bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index] ) |
1038 ( ( new_bits | ( new_bits << ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections[var_index] );
1039 care._bits = ( care._bits | ( care._bits >> ( uint64_t( 1 ) << var_index ) ) ) & kitty::detail::projections_neg[var_index];
1040 care._bits = care._bits | ( care._bits << ( uint64_t( 1 ) << var_index ) );
1056 void get_decomposition_abc(
unsigned char* decompArray )
1058 unsigned char* pArray = decompArray;
1059 unsigned char bytes = 2;
1068 *pArray = bs_support_size;
1073 for (
uint32_t i = 0; i < bs_support_size; ++i )
1075 *pArray = (
unsigned char)permutations[bs_support[i] + best_free_set];
1081 uint32_t tt_num_bytes = ( bs_support_size <= 3 ) ? 1 : ( 1 << ( bs_support_size - 3 ) );
1082 for (
uint32_t i = 0; i < tt_num_bytes; ++i )
1084 *pArray = (
unsigned char)( ( dec_funcs[0] >> ( 8 * i ) ) & 0xFF );
1091 uint32_t support_size = best_free_set + 1 + num_shared_vars;
1092 *pArray = support_size;
1097 for (
uint32_t i = 0; i < best_free_set; ++i )
1099 *pArray = (
unsigned char)permutations[i];
1104 *pArray = (
unsigned char)num_vars;
1108 for (
uint32_t i = 0; i < num_shared_vars; ++i )
1110 *pArray = (
unsigned char)permutations[num_vars - num_shared_vars + i];
1116 tt_num_bytes = ( support_size <= 3 ) ? 1 : ( 1 << ( support_size - 3 ) );
1117 for (
uint32_t i = 0; i < tt_num_bytes; ++i )
1119 *pArray = (
unsigned char)( ( dec_funcs[1] >> ( 8 * i ) ) & 0xFF );
1125 *decompArray = bytes;
1131 STT pis[max_num_vars];
1132 for (
uint32_t i = 0; i < num_vars; ++i )
1139 for (
uint32_t i = 0; i < bs_support_size; ++i )
1141 bsi[i] = pis[best_free_set + bs_support[i]];
1146 for (
uint32_t i = 0u; i < ( 1 << num_vars ); ++i )
1149 for (
auto j = 0; j < bs_support_size; ++j )
1151 pattern |= get_bit( bsi[j], i ) << j;
1153 if ( ( dec_funcs[0] >> pattern ) & 1 )
1155 set_bit( bsf_sim, i );
1161 for (
uint32_t i = 0u; i < ( 1 << num_vars ); ++i )
1164 for (
auto j = 0; j < best_free_set; ++j )
1166 pattern |= get_bit( pis[j], i ) << j;
1168 pattern |= get_bit( bsf_sim, i ) << best_free_set;
1171 for (
auto j = 0u; j < num_shared_vars; ++j )
1173 pattern |= get_bit( pis[num_vars - num_shared_vars + j], i ) << ( best_free_set + 1 + j );
1176 if ( ( dec_funcs[1] >> pattern ) & 1 )
1178 set_bit( top_sim, i );
1182 for (
uint32_t i = 0; i < ( 1 << ( num_vars - 6 ) ); ++i )
1184 if ( top_sim._bits[i] != start_tt._bits[i] )
1188 std::cout <<
"Found incorrect decomposition\n";
1189 report_tt( top_sim );
1190 std::cout <<
" instead_of\n";
1191 report_tt( start_tt );
1199 uint32_t get_bit(
const STT& tt, uint64_t index )
1201 return ( tt._bits[index >> 6] >> ( index & 0x3f ) ) & 0x1;
1204 void set_bit( STT& tt, uint64_t index )
1206 tt._bits[index >> 6] |= uint64_t( 1 ) << ( index & 0x3f );
1209 void report_tt( STT
const& stt )
1211 kitty::dynamic_truth_table tt( num_vars );
1213 std::copy( std::begin( stt._bits ), std::begin( stt._bits ) + ( 1 << ( num_vars - 6 ) ), std::begin( tt ) );
1225 uint64_t dec_funcs[2];
1229 bool const multiple_ss;
1231 std::array<uint32_t, max_num_vars> permutations;