23#define QPS_EPS (QPS_TOL * QPS_TOL)
25#define QPS_MAX_TOL 0.1
26#define QPS_LOOP_TOL 1.0e-3
28#define QPS_RELAX_ITER 180
29#define QPS_MAX_ITER 200
30#define QPS_STEPSIZE_RETRIES 2
31#define QPS_MINSTEP 1.0e-6
32#define QPS_DEC_CHANGE 0.01
35#define QPS_PRECON_EPS 1.0e-9
40#define QPS_DEBUG_FILE "/tmp/qps_debug.log"
85 for (i =
p->num_cells; i--;) {
89 tp[i * 2 + 1] = cp[t + 1];
93 tp[i * 2 + 1] =
p->y[i];
97 for (i =
p->num_cells; i--;) {
104 while ((u =
p->cog_list[pr++]) >= 0) {
107 rx -=
p->area[u] * tp[u * 2];
108 ry -=
p->area[u] * tp[u * 2 + 1];
111 rx +=
p->cog_x[t] * ta;
112 ry +=
p->cog_y[t] * ta;
113 tp[i * 2] = rx /
p->area[i];
114 tp[i * 2 + 1] = ry /
p->area[i];
119 fprintf(
p->priv_fp,
"### qps_settp()\n");
120 for (i = 0; i <
p->num_cells; i++) {
121 fprintf(
p->priv_fp,
"%f %f\n", tp[i * 2], tp[i * 2 + 1]);
140#if !defined(QPS_HOIST)
151 for (j = 0; j <
p->num_cells; j++) {
154 while ((k =
p->priv_cc[pr]) >= 0) {
157 ty = tp[k * 2 + 1] - jy;
158 f += w * (tx * tx + ty * ty);
165#if !defined(QPS_HOIST)
168 for (i = 0; i <
p->loop_num; i++) {
170 j = st =
p->loop_list[pr++];
172 jy = sy = tp[j * 2 + 1];
173 while ((k =
p->loop_list[pr]) >= 0) {
178 t += tx * tx + ty * ty;
186 t += tx * tx + ty * ty;
189 fprintf(
p->priv_fp,
"### qps_penalty() %d %f %f\n",
190 i,
p->loop_max[i], t);
193 f +=
p->loop_penalty[i] * t;
199 for (j =
p->num_cells; j--;) {
200 f +=
p->priv_mxl[j] * (-tp[j * 2]);
201 f +=
p->priv_mxh[j] * (tp[j * 2] -
p->max_x);
202 f +=
p->priv_myl[j] * (-tp[j * 2 + 1]);
203 f +=
p->priv_myh[j] * (tp[j * 2 + 1] -
p->max_y);
208 fprintf(
p->priv_fp,
"### qps_func() %f %f\n", f,
p->f);
228#if !defined(QPS_HOIST)
237 for (i =
p->num_cells; i--;) {
239 tp2[i * 2 + 1] = 0.0;
241 for (j = 0; j <
p->num_cells; j++) {
244 while ((k =
p->priv_cc[pr]) >= 0) {
245 w = 2.0 *
p->priv_cw[pr];
252 tp2[j * 2 + 1] += ty;
253 tp2[k * 2 + 1] -= ty;
259#if !defined(QPS_HOIST)
262 for (i = 0; i <
p->loop_num; i++) {
263 j = st =
p->loop_list[pr++];
265 jy = sy = tp[j * 2 + 1];
266 w = 2.0 *
p->loop_penalty[i];
267 while ((k =
p->loop_list[pr]) >= 0) {
274 tp2[j * 2 + 1] += ty;
275 tp2[k * 2 + 1] -= ty;
285 tp2[j * 2 + 1] += ty;
286 tp2[st * 2 + 1] -= ty;
292 for (j =
p->num_cells; j--;) {
293 tp2[j * 2] +=
p->priv_mxh[j] -
p->priv_mxl[j];
294 tp2[j * 2 + 1] +=
p->priv_myh[j] -
p->priv_myl[j];
299 fprintf(
p->priv_fp,
"### qps_dfunc() partials\n");
300 for (j = 0; j <
p->num_cells; j++) {
301 fprintf(
p->priv_fp,
"%f %f\n", tp2[j * 2], tp2[j * 2 + 1]);
306 for (j =
p->priv_n; j--;) {
309 for (j =
p->num_cells; j--;) {
313 d[ji + 1] += tp2[j * 2 + 1];
318 while ((k =
p->cog_list[pr]) >= 0) {
323 assert(fabs(w -
p->area[k] /
p->area[j]) < 1.0e-6);
325 d[ki] -= tp2[j * 2] * w;
326 d[ki + 1] -= tp2[j * 2 + 1] * w;
334 fprintf(
p->priv_fp,
"### qps_dfunc() gradient\n");
335 for (j = 0; j <
p->priv_n; j++) {
336 fprintf(
p->priv_fp,
"%f\n", d[j]);
360#if !defined(QPS_HOIST)
369 for (i =
p->num_cells; i--;) {
373 for (j =
p->num_cells; j--;) {
377 tp[j * 2 + 1] = h[ji + 1];
382 while ((k =
p->cog_list[pr]) >= 0) {
387 assert(fabs(w -
p->area[k] /
p->area[j]) < 1.0e-6);
389 tp[j * 2] -= h[ki] * w;
390 tp[j * 2 + 1] -= h[ki + 1] * w;
399 for (j = 0; j <
p->num_cells; j++) {
402 while ((k =
p->priv_cc[pr]) >= 0) {
405 ky = tp[k * 2 + 1] - jy;
406 f += w * (kx * kx + ky * ky);
412#if !defined(QPS_HOIST)
415 for (i = 0; i <
p->loop_num; i++) {
417 j = st =
p->loop_list[pr++];
419 jy = sy = tp[j * 2 + 1];
420 while ((k =
p->loop_list[pr]) >= 0) {
425 t += tx * tx + ty * ty;
433 t += tx * tx + ty * ty;
434 f +=
p->loop_penalty[i] * t;
445 for (j =
p->priv_n; j--;) {
446 p->priv_cp[j] += f * h[j];
449 fprintf(
p->priv_fp,
"### qps_linmin() step %f\n", f);
450 for (j = 0; j <
p->priv_n; j++) {
451 fprintf(
p->priv_fp,
"%f\n",
p->priv_cp[j]);
482#if defined(QPS_PRECON)
483 h[j] *=
p->priv_pcgt[j];
488 for (i = 0; i < 2 * n; i++) {
491 fprintf(
p->priv_fp,
"### qps_cgmin() top\n");
492 for (j = 0; j <
p->priv_n; j++) {
493 fprintf(
p->priv_fp,
"%f\n",
p->priv_cp[j]);
500 qps_linmin(
p, dgg, h);
502 p->priv_f = qps_func(
p);
503 if (fabs((
p->priv_f) - fp) <=
513#if defined(QPS_PRECON)
514 t *=
p->priv_pcgt[j];
522#if defined(QPS_PRECON)
523 t *=
p->priv_pcgt[j];
525 h[j] = t + gam * h[j];
529 fprintf(
p->priv_fp,
"### CG ITERS=%d %d %d\n", i,
p->cog_num,
p->loop_num);
532 fprintf(stderr,
"### Too many iterations in qps_cgmin()\n");
533#if defined(QPS_DEBUG)
534 fprintf(
p->priv_fp,
"### Too many iterations in qps_cgmin()\n");
547#if defined(QPS_DEBUG)
548 p->priv_fp = fopen(QPS_DEBUG_FILE,
"a");
553 fprintf(
p->priv_fp,
"### n=%d gn=%d ln=%d\n",
p->num_cells,
p->cog_num,
556 fprintf(
p->priv_fp,
"### (c w) values\n");
557 for (i = 0; i <
p->num_cells; i++) {
558 fprintf(
p->priv_fp,
"net %d: ", i);
559 while (
p->connect[pr] >= 0) {
560 fprintf(
p->priv_fp,
"(%d %f) ",
p->connect[pr],
p->edge_weight[pr]);
563 fprintf(
p->priv_fp,
"(-1 -1.0)\n");
566 fprintf(
p->priv_fp,
"### (x y f) values\n");
567 for (i = 0; i <
p->num_cells; i++) {
568 fprintf(
p->priv_fp,
"cell %d: (%f %f %d)\n", i,
p->x[i],
p->y[i],
573 fprintf(
p->priv_fp,
"### ga values\n");
574 for (i = 0; i <
p->num_cells; i++) {
575 fprintf(
p->priv_fp,
"cell %d: (%f)\n", i,
p->area[i]);
579 fprintf(
p->priv_fp,
"### gl values\n");
580 for (i = 0; i <
p->cog_num; i++) {
581 fprintf(
p->priv_fp,
"cog %d: ", i);
582 while (
p->cog_list[pr] >= 0) {
583 fprintf(
p->priv_fp,
"%d ",
p->cog_list[pr]);
586 fprintf(
p->priv_fp,
"-1\n");
589 fprintf(
p->priv_fp,
"### (gx gy) values\n");
590 for (i = 0; i <
p->cog_num; i++) {
591 fprintf(
p->priv_fp,
"cog %d: (%f %f)\n", i,
p->cog_x[i],
p->cog_y[i]);
596 p->priv_ii = (
int *)
malloc(
p->num_cells *
sizeof(
int));
605 for (i = 0; i <
p->num_cells; i++) {
606 while ((j =
p->connect[pr]) >= 0) {
615 p->priv_cc = (
int *)
malloc(pw *
sizeof(
int));
617 p->priv_cr = (
int *)
malloc(
p->num_cells *
sizeof(
int));
625 for (i = 0; i <
p->num_cells; i++) {
627 while ((j =
p->connect[pr]) >= 0) {
629 p->priv_cc[pw] =
p->connect[pr];
630 p->priv_ct[pw] =
p->edge_weight[pr];
636 p->priv_ct[pw] = -1.0;
645 p->priv_tp2 =
p->priv_tp + 2 *
p->num_cells;
664 for (i = 2 *
p->num_cells; i--;) {
668 for (i = 0; i <
p->cog_num; i++) {
669 while ((cell =
p->cog_list[j]) >= 0) {
670 t1[cell * 2] =
p->cog_x[i];
671 t1[cell * 2 + 1] =
p->cog_y[i];
681 t = (
p->max_x *
p->max_x +
p->max_y *
p->max_y);
683 for (i =
p->num_cells; i--;) {
709#if defined(QPS_HOIST)
716#if defined(QPS_HOIST)
718 p->priv_cw =
p->priv_ct;
721 for(i=
p->priv_cm; i--;) {
722 p->priv_cw[i] =
p->priv_ct[i];
726 for (i = 0; i <
p->loop_num; i++) {
727 while ((j =
p->priv_la[pr++]) != -1) {
729 p->priv_cw[j] +=
p->loop_penalty[i];
736 p->priv_cw =
p->priv_ct;
741 if (
p->max_enable ||
p->loop_num) {
742 if (
p->max_enable == 1 || (
p->loop_num &&
p->loop_k == 0)) {
744 p->priv_fmax =
p->priv_f;
745 p->priv_fprev =
p->priv_f;
746 p->priv_fopt = qps_estopt(
p);
751 if (
p->priv_f <
p->priv_fprev &&
752 (
p->priv_fprev -
p->priv_f) >
759 p->priv_fprev =
p->priv_f;
760 if (
p->priv_fmax <
p->priv_f) {
761 p->priv_fmax =
p->priv_f;
763 if (
p->priv_f >=
p->priv_fopt) {
764 p->priv_fopt =
p->priv_fmax * 2.0;
767 fprintf(
p->priv_fp,
"### warning: changing fopt\n");
772 fprintf(
p->priv_fp,
"### max_stat %.2e %.2e %.2e %.2e\n",
773 p->priv_f,
p->priv_eps,
p->priv_fmax,
p->priv_fopt);
781 fprintf(
p->priv_fp,
"### begin_update %d\n",
p->loop_k);
785#if defined(QPS_HOIST)
788 for (i = 0; i <
p->loop_num; i++) {
790 j = st =
p->loop_list[pr++];
791 jx = sx =
p->priv_tp[j * 2];
792 jy = sy =
p->priv_tp[j * 2 + 1];
793 while ((k =
p->loop_list[pr]) >= 0) {
794 kx =
p->priv_tp[k * 2];
795 ky =
p->priv_tp[k * 2 + 1];
798 t += tx * tx + ty * ty;
806 t += tx * tx + ty * ty;
807 p->priv_lt[i] = t -
p->loop_max[i];
814 for (i =
p->loop_num; i--;) {
815 if (
p->loop_penalty[i] != 0.0) {
816 fprintf(
p->priv_fp,
"### penalty %d %.2e\n", i,
p->loop_penalty[i]);
821 for (i =
p->loop_num; i--;) {
822 if (
p->priv_lt[i] > 0.0 ||
p->loop_penalty[i] > 0.0) {
823 t +=
p->priv_lt[i] *
p->priv_lt[i];
827 fprintf(
p->priv_fp,
"### skip %d %f\n", i,
p->priv_lt[i]);
833 p->loop_penalty[i] *
p->priv_lt[i] < -z)) {
836 fprintf(
p->priv_fp,
"### not_done %d %f %f %f %f\n", i,
837 p->priv_lt[i], z,
p->loop_max[i],
p->loop_penalty[i]);
842 fprintf(
p->priv_fp,
"### done %d %f %f %f %f\n", i,
843 p->priv_lt[i], z,
p->loop_max[i],
p->loop_penalty[i]);
849 t =
p->priv_eps * (
p->priv_fopt -
p->priv_f) / t;
851 for (i =
p->loop_num; i--;) {
852 pm1 =
p->loop_penalty[i];
854 fprintf(
p->priv_fp,
"### update %d %.2e %.2e %.2e %.2e %.2e\n", i,
855 t,
p->priv_lt[i], t *
p->priv_lt[i], pm1,
p->loop_max[i]);
857 p->loop_penalty[i] += t *
p->priv_lt[i];
858 if (
p->loop_penalty[i] < 0.0) {
859 p->loop_penalty[i] = 0.0;
861 pm2 =
p->loop_penalty[i];
862 tp += fabs(pm1 - pm2);
865 fprintf(
p->priv_fp,
"### penalty mag %f\n", tp);
873 fprintf(
p->priv_fp,
"### begin_max_update %d\n",
p->max_enable);
876 for (i =
p->num_cells; i--;) {
883 fprintf(
p->priv_fp,
"### nxl %d %f %f\n", i, z,
p->priv_mxl[i]);
886 z = (
p->x[i] -
p->max_x);
892 fprintf(
p->priv_fp,
"### nxh %d %f %f\n", i, z,
p->priv_mxh[i]);
901 fprintf(
p->priv_fp,
"### nyl %d %f %f\n", i, z,
p->priv_myl[i]);
904 z = (
p->y[i] -
p->max_y);
910 fprintf(
p->priv_fp,
"### nyh %d %f %f\n", i, z,
p->priv_myh[i]);
915 fprintf(
p->priv_fp,
"### max_done %d %f\n",
p->max_done, t);
918 t =
p->priv_eps * (
p->priv_fopt -
p->priv_f) / t;
920 for (i =
p->num_cells; i--;) {
922 pm1 =
p->priv_mxl[i];
923 p->priv_mxl[i] += t * z;
924 if (
p->priv_mxl[i] < 0.0) {
925 p->priv_mxl[i] = 0.0;
927 pm2 =
p->priv_mxl[i];
928 tp += fabs(pm1 - pm2);
930 z = (
p->x[i] -
p->max_x);
931 pm1 =
p->priv_mxh[i];
932 p->priv_mxh[i] += t * z;
933 if (
p->priv_mxh[i] < 0.0) {
934 p->priv_mxh[i] = 0.0;
936 pm2 =
p->priv_mxh[i];
937 tp += fabs(pm1 - pm2);
940 pm1 =
p->priv_myl[i];
941 p->priv_myl[i] += t * z;
942 if (
p->priv_myl[i] < 0.0) {
943 p->priv_myl[i] = 0.0;
945 pm2 =
p->priv_myl[i];
946 tp += fabs(pm1 - pm2);
948 z = (
p->y[i] -
p->max_y);
949 pm1 =
p->priv_myh[i];
950 p->priv_myh[i] += t * z;
951 if (
p->priv_myh[i] < 0.0) {
952 p->priv_myh[i] = 0.0;
954 pm2 =
p->priv_myh[i];
955 tp += fabs(pm1 - pm2);
959 for (i =
p->num_cells; i--;) {
960 fprintf(
p->priv_fp,
"### max_penalty %d %f %f %f %f\n", i,
961 p->priv_mxl[i],
p->priv_mxh[i],
p->priv_myl[i],
p->priv_myh[i]);
988#if defined(QPS_PRECON)
993#if defined(QPS_HOIST)
1003 p->priv_mxh =
p->priv_mxl +
p->num_cells;
1004 p->priv_myl =
p->priv_mxl + 2 *
p->num_cells;
1005 p->priv_myh =
p->priv_mxl + 3 *
p->num_cells;
1006 for (i = 4 *
p->num_cells; i--;) {
1007 p->priv_mxl[i] = 0.0;
1012 for (i =
p->num_cells; i--;) {
1013 p->priv_ii[i] = (
p->fixed[i]) ? (-1) : (0);
1018 p->priv_gt = (
int *)
malloc(
p->cog_num *
sizeof(
int));
1023 for (i = 0; i <
p->cog_num; i++) {
1027 while ((j =
p->cog_list[pr++]) >= 0) {
1030 if (
p->area[j] > bk) {
1038 p->priv_ii[tk] = -2 - i;
1045 for (i = 0; i <
p->cog_num; i++) {
1046 while ((j =
p->cog_list[pr]) >= 0) {
1047 p->priv_gw[pr] =
p->area[j] /
p->priv_gm[i];
1050 p->priv_gw[pr] = -1.0;
1057 for (i =
p->num_cells; i--;) {
1058 if (!
p->priv_ii[i]) {
1059 p->priv_ii[i] = 2 * (
p->priv_n++);
1065 for (i = 0; i <
p->num_cells; i++) {
1066 fprintf(
p->priv_fp,
"### ii %d %d\n", i,
p->priv_ii[i]);
1070#if defined(QPS_PRECON)
1075 for (i =
p->num_cells; i--;) {
1076 p->priv_pcg[i] = 0.0;
1079 for (i = 0; i <
p->num_cells; i++) {
1080 while ((c =
p->priv_cc[pr]) >= 0) {
1082 p->priv_pcg[i] += t;
1083 p->priv_pcg[c] += t;
1089 for (i = 0; i <
p->loop_num; i++) {
1090 t = 2.0 *
p->loop_penalty[i];
1091 while ((c =
p->loop_list[pr++]) >= 0) {
1092 p->priv_pcg[c] += t;
1097 for (i =
p->num_cells; i--;) {
1098 fprintf(
p->priv_fp,
"### precon %d %.2e\n", i,
p->priv_pcg[i]);
1101 for (i =
p->priv_n; i--;) {
1102 p->priv_pcgt[i] = 0.0;
1104 for (i = 0; i <
p->num_cells; i++) {
1108 p->priv_pcgt[c] += t;
1109 p->priv_pcgt[c + 1] += t;
1113 pr =
p->priv_gt[-(c+2)];
1114 while ((j =
p->cog_list[pr++]) >= 0) {
1117 w =
p->area[j] /
p->area[i];
1118 t = w * w *
p->priv_pcg[i];
1119 p->priv_pcgt[ji] += t;
1120 p->priv_pcgt[ji + 1] += t;
1126 for (i = 0; i <
p->priv_n; i++) {
1127 t =
p->priv_pcgt[i];
1129 p->priv_pcgt[i] = 1.0;
1132 p->priv_pcgt[i] = 1.0 /
p->priv_pcgt[i];
1142 p->priv_g =
p->priv_cp +
p->priv_n;
1143 p->priv_h =
p->priv_cp + 2 *
p->priv_n;
1144 p->priv_xi =
p->priv_cp + 3 *
p->priv_n;
1147 for (i =
p->num_cells; i--;) {
1148 if (
p->priv_ii[i] >= 0) {
1149 p->priv_cp[
p->priv_ii[i]] =
p->x[i];
1150 p->priv_cp[
p->priv_ii[i] + 1] =
p->y[i];
1157#if defined(QPS_HOIST)
1159 for (i=
p->loop_num; i--;) {
1160 while (
p->loop_list[pr++] >= 0) {
1165 p->priv_la = (
int *)
malloc(pr *
sizeof(
int));
1168 for (i = 0; i <
p->loop_num; i++) {
1169 j = st =
p->loop_list[pr++];
1170 while ((k =
p->loop_list[pr]) >= 0) {
1180 pw =
p->priv_cr[m1];
1181 while (
p->priv_cc[pw] != m2) {
1183 if (
p->priv_cc[pw] < 0) {
1189 p->priv_la[pr-1] = pw;
1202 pw =
p->priv_cr[m1];
1203 while (
p->priv_cc[pw] != m2) {
1205 if (
p->priv_cc[pw] < 0) {
1211 p->priv_la[pr-1] = pw;
1212 p->priv_la[pr] = -1;
1220 }
while (!
p->loop_done || !
p->max_done);
1224 for (i =
p->num_cells; i--;) {
1225 p->x[i] =
p->priv_tp[i * 2];
1226 p->y[i] =
p->priv_tp[i * 2 + 1];
1229 for (i =
p->num_cells; i--;) {
1230 fprintf(
p->priv_fp,
"### cloc %d %f %f\n", i,
p->x[i],
p->y[i]);
1235 if (
p->max_enable) {
1245#if defined(QPS_HOIST)
1250#if defined(QPS_PRECON)
1268#if defined(QPS_DEBUG)
#define ABC_NAMESPACE_IMPL_START
#define ABC_NAMESPACE_IMPL_END
#define QPS_STEPSIZE_RETRIES
void qps_init(qps_problem_t *p)
void qps_solve(qps_problem_t *p)
void qps_clean(qps_problem_t *p)
struct qps_problem qps_problem_t
ABC_NAMESPACE_HEADER_START typedef float qps_float_t