1	converged=False	diverged=False	epoch=0	gradient-norm=0.8506920907489984	learning-rate=0.0003	perplexity-train=94.03522029206755	perplexity-val=44.46403968411314	time-elapsed=321.7481052875519	used-gpu-memory=3355
2	converged=False	diverged=False	epoch=1	gradient-norm=0.7399031671623405	learning-rate=0.0003	perplexity-train=28.095741539075625	perplexity-val=25.619204646837918	time-elapsed=649.0971975326538	used-gpu-memory=3355
3	converged=False	diverged=False	epoch=2	gradient-norm=0.7296896622021012	learning-rate=0.0003	perplexity-train=18.84686991042711	perplexity-val=19.685674310639065	time-elapsed=975.4809310436249	used-gpu-memory=3355
4	converged=False	diverged=False	epoch=3	gradient-norm=0.7318891746190189	learning-rate=0.0003	perplexity-train=14.923297341125664	perplexity-val=16.490099158045734	time-elapsed=1302.213172674179	used-gpu-memory=3355
5	converged=False	diverged=False	epoch=4	gradient-norm=0.6384166090518851	learning-rate=0.0003	perplexity-train=12.780017722209914	perplexity-val=14.488071462390824	time-elapsed=1628.5764095783234	used-gpu-memory=3355
6	converged=False	diverged=False	epoch=4	gradient-norm=0.8395878207318718	learning-rate=0.0003	perplexity-train=11.17799236331008	perplexity-val=13.289013717469187	time-elapsed=1954.9160623550415	used-gpu-memory=3355
7	converged=False	diverged=False	epoch=5	gradient-norm=0.6779408222556195	learning-rate=0.0003	perplexity-train=10.284813429869258	perplexity-val=12.411809918344591	time-elapsed=2282.5044026374817	used-gpu-memory=3359
8	converged=False	diverged=False	epoch=6	gradient-norm=0.6736129106202391	learning-rate=0.0003	perplexity-train=9.544503274043755	perplexity-val=11.795095471151596	time-elapsed=2609.122333049774	used-gpu-memory=3359
9	converged=False	diverged=False	epoch=7	gradient-norm=0.6796706669014174	learning-rate=0.0003	perplexity-train=8.97552264417145	perplexity-val=11.231379942105562	time-elapsed=2935.6666164398193	used-gpu-memory=3359
10	converged=False	diverged=False	epoch=8	gradient-norm=0.7805231374315386	learning-rate=0.0003	perplexity-train=8.648905321965925	perplexity-val=10.788373270081468	time-elapsed=3262.5810256004333	used-gpu-memory=3359
11	converged=False	diverged=False	epoch=8	gradient-norm=0.6487831673639821	learning-rate=0.0003	perplexity-train=8.182792371146586	perplexity-val=10.450870274283957	time-elapsed=3588.3496522903442	used-gpu-memory=3359
12	converged=False	diverged=False	epoch=9	gradient-norm=0.6496854388108504	learning-rate=0.0003	perplexity-train=7.871067370199547	perplexity-val=10.21777589089159	time-elapsed=3915.5888645648956	used-gpu-memory=3359
13	converged=False	diverged=False	epoch=10	gradient-norm=0.6180969694910174	learning-rate=0.0003	perplexity-train=7.68864698808393	perplexity-val=9.939162766223447	time-elapsed=4242.141668319702	used-gpu-memory=3359
14	converged=False	diverged=False	epoch=11	gradient-norm=0.7572392924795062	learning-rate=0.0003	perplexity-train=7.454149645355215	perplexity-val=9.639312416854372	time-elapsed=4569.067020654678	used-gpu-memory=3359
15	converged=False	diverged=False	epoch=12	gradient-norm=0.6767979071547099	learning-rate=0.0003	perplexity-train=7.316537562487715	perplexity-val=9.431205460959044	time-elapsed=4896.070914745331	used-gpu-memory=3359
16	converged=False	diverged=False	epoch=12	gradient-norm=0.6092306151646595	learning-rate=0.0003	perplexity-train=7.083036458051333	perplexity-val=9.322955363538366	time-elapsed=5222.485599040985	used-gpu-memory=3359
17	converged=False	diverged=False	epoch=13	gradient-norm=0.6070314744818406	learning-rate=0.0003	perplexity-train=6.930670690255465	perplexity-val=9.19584346620895	time-elapsed=5548.986966848373	used-gpu-memory=3359
18	converged=False	diverged=False	epoch=14	gradient-norm=0.6332608220014254	learning-rate=0.0003	perplexity-train=6.823621499508219	perplexity-val=9.01477805530899	time-elapsed=5876.08164191246	used-gpu-memory=3361
19	converged=False	diverged=False	epoch=15	gradient-norm=0.5961970923949389	learning-rate=0.0003	perplexity-train=6.709140679390933	perplexity-val=8.877448520571182	time-elapsed=6201.93093919754	used-gpu-memory=3361
20	converged=False	diverged=False	epoch=16	gradient-norm=0.5832027179034777	learning-rate=0.0003	perplexity-train=6.657111246125711	perplexity-val=8.844883740017192	time-elapsed=6528.658984422684	used-gpu-memory=3361
21	converged=False	diverged=False	epoch=16	gradient-norm=0.6343154435413495	learning-rate=0.0003	perplexity-train=6.493964939558161	perplexity-val=8.711836914551023	time-elapsed=6855.169038057327	used-gpu-memory=3361
22	converged=False	diverged=False	epoch=17	gradient-norm=0.5712219869544073	learning-rate=0.0003	perplexity-train=6.398304580677887	perplexity-val=8.640126803545309	time-elapsed=7182.1144552230835	used-gpu-memory=3361
23	converged=False	diverged=False	epoch=18	gradient-norm=0.6812020524209063	learning-rate=0.0003	perplexity-train=6.333274246078663	perplexity-val=8.589042883895258	time-elapsed=7508.414121866226	used-gpu-memory=3361
24	converged=False	diverged=False	epoch=19	gradient-norm=0.5965837728827503	learning-rate=0.0003	perplexity-train=6.273684677002164	perplexity-val=8.436539536123963	time-elapsed=7834.793417930603	used-gpu-memory=3361
25	converged=False	diverged=False	epoch=20	gradient-norm=0.5709202674286283	learning-rate=0.0003	perplexity-train=6.248034897939909	perplexity-val=8.42674974531371	time-elapsed=8161.381982326508	used-gpu-memory=3361
26	converged=False	diverged=False	epoch=20	gradient-norm=0.5776559603002334	learning-rate=0.0003	perplexity-train=6.123486800431123	perplexity-val=8.318594673912246	time-elapsed=8486.887112617493	used-gpu-memory=3361
27	converged=False	diverged=False	epoch=21	gradient-norm=0.5721803922638979	learning-rate=0.0003	perplexity-train=6.056693476607522	perplexity-val=8.351548730859928	time-elapsed=8812.337401151657	used-gpu-memory=3361
28	converged=False	diverged=False	epoch=22	gradient-norm=0.534430746521221	learning-rate=0.0003	perplexity-train=6.017699398177429	perplexity-val=8.210832944254488	time-elapsed=9138.891773700714	used-gpu-memory=3361
29	converged=False	diverged=False	epoch=23	gradient-norm=0.600283333268067	learning-rate=0.0003	perplexity-train=5.988316808565238	perplexity-val=8.179183544438406	time-elapsed=9465.638230085373	used-gpu-memory=3361
30	converged=False	diverged=False	epoch=24	gradient-norm=0.6369004569678895	learning-rate=0.0003	perplexity-train=5.949856946969512	perplexity-val=8.119739613528678	time-elapsed=9791.459475517273	used-gpu-memory=3361
31	converged=False	diverged=False	epoch=24	gradient-norm=0.5393944841959462	learning-rate=0.0003	perplexity-train=5.8646590075102605	perplexity-val=8.092238246411886	time-elapsed=10116.84341454506	used-gpu-memory=3361
32	converged=False	diverged=False	epoch=25	gradient-norm=0.7008724657341867	learning-rate=0.0003	perplexity-train=5.81771927557749	perplexity-val=8.034192213108932	time-elapsed=10441.778031349182	used-gpu-memory=3361
33	converged=False	diverged=False	epoch=26	gradient-norm=0.5338071423132168	learning-rate=0.0003	perplexity-train=5.797245700988984	perplexity-val=7.981235748214754	time-elapsed=10768.21429181099	used-gpu-memory=3361
34	converged=False	diverged=False	epoch=27	gradient-norm=0.726632097031403	learning-rate=0.0003	perplexity-train=5.754805451754135	perplexity-val=7.991499619889684	time-elapsed=11093.849967241287	used-gpu-memory=3361
35	converged=False	diverged=False	epoch=28	gradient-norm=0.4815704035167528	learning-rate=0.0003	perplexity-train=5.751376190357084	perplexity-val=8.014987928055044	time-elapsed=11418.834760665894	used-gpu-memory=3361
36	converged=False	diverged=False	epoch=28	gradient-norm=0.577111584197182	learning-rate=0.0003	perplexity-train=5.666472564686274	perplexity-val=7.850229166212419	time-elapsed=11743.223725795746	used-gpu-memory=3361
37	converged=False	diverged=False	epoch=29	gradient-norm=0.5789928687901414	learning-rate=0.0003	perplexity-train=5.628964231880396	perplexity-val=7.819123149265575	time-elapsed=12069.336667060852	used-gpu-memory=3361
38	converged=False	diverged=False	epoch=30	gradient-norm=0.5061811431310714	learning-rate=0.0003	perplexity-train=5.61962968367102	perplexity-val=7.790508149651661	time-elapsed=12395.852680921555	used-gpu-memory=3361
39	converged=False	diverged=False	epoch=31	gradient-norm=0.5601208648090145	learning-rate=0.0003	perplexity-train=5.598840957557529	perplexity-val=7.779601665291303	time-elapsed=12722.56296825409	used-gpu-memory=3361
40	converged=False	diverged=False	epoch=32	gradient-norm=0.5556095258743493	learning-rate=0.0003	perplexity-train=5.581911042039072	perplexity-val=7.711578982182139	time-elapsed=13049.077702760696	used-gpu-memory=3361
41	converged=False	diverged=False	epoch=32	gradient-norm=0.5428594936653472	learning-rate=0.0003	perplexity-train=5.54057586440728	perplexity-val=7.743645632981073	time-elapsed=13374.24034690857	used-gpu-memory=3361
42	converged=False	diverged=False	epoch=33	gradient-norm=0.5405308299660387	learning-rate=0.0003	perplexity-train=5.491758425401637	perplexity-val=7.682164923879493	time-elapsed=13701.280247688293	used-gpu-memory=3361
43	converged=False	diverged=False	epoch=34	gradient-norm=0.49708248151771706	learning-rate=0.0003	perplexity-train=5.46512975398586	perplexity-val=7.672342941115625	time-elapsed=14029.221430063248	used-gpu-memory=3361
44	converged=False	diverged=False	epoch=35	gradient-norm=0.4948758586911345	learning-rate=0.0003	perplexity-train=5.457255345832471	perplexity-val=7.66606685416712	time-elapsed=14355.983130216599	used-gpu-memory=3361
45	converged=False	diverged=False	epoch=36	gradient-norm=0.5270219659191956	learning-rate=0.0003	perplexity-train=5.4579379013596565	perplexity-val=7.632565403915467	time-elapsed=14686.395551919937	used-gpu-memory=3361
46	converged=False	diverged=False	epoch=36	gradient-norm=0.5443423795568363	learning-rate=0.0003	perplexity-train=5.410199279886937	perplexity-val=7.623859039341883	time-elapsed=15014.397331237793	used-gpu-memory=3361
47	converged=False	diverged=False	epoch=37	gradient-norm=0.4947696113351763	learning-rate=0.0003	perplexity-train=5.367040908242883	perplexity-val=7.603988435732944	time-elapsed=15342.52334356308	used-gpu-memory=3361
48	converged=False	diverged=False	epoch=38	gradient-norm=0.5016320398710841	learning-rate=0.0003	perplexity-train=5.341444532159128	perplexity-val=7.550437620962975	time-elapsed=15668.13803601265	used-gpu-memory=3361
49	converged=False	diverged=False	epoch=39	gradient-norm=0.4859416435473065	learning-rate=0.0003	perplexity-train=5.3652953201833435	perplexity-val=7.51312015412598	time-elapsed=15996.386075258255	used-gpu-memory=3361
50	converged=False	diverged=False	epoch=40	gradient-norm=0.4897359358690441	learning-rate=0.0003	perplexity-train=5.346006048758046	perplexity-val=7.526825014308736	time-elapsed=16322.993819713593	used-gpu-memory=3361
51	converged=False	diverged=False	epoch=40	gradient-norm=0.5398778885604162	learning-rate=0.0003	perplexity-train=5.2999412610851895	perplexity-val=7.448758165351426	time-elapsed=16649.044306993484	used-gpu-memory=3361
52	converged=False	diverged=False	epoch=41	gradient-norm=0.5001093005240085	learning-rate=0.0003	perplexity-train=5.279218810894613	perplexity-val=7.438691650763949	time-elapsed=16976.044377088547	used-gpu-memory=3361
53	converged=False	diverged=False	epoch=42	gradient-norm=0.49738553804059316	learning-rate=0.0003	perplexity-train=5.248955074984582	perplexity-val=7.397802165498418	time-elapsed=17302.318118333817	used-gpu-memory=3361
54	converged=False	diverged=False	epoch=43	gradient-norm=0.557384277258109	learning-rate=0.0003	perplexity-train=5.270300849348662	perplexity-val=7.451353324637817	time-elapsed=17629.737084388733	used-gpu-memory=3361
55	converged=False	diverged=False	epoch=44	gradient-norm=0.5750694773518062	learning-rate=0.0003	perplexity-train=5.245061937588573	perplexity-val=7.466848880899684	time-elapsed=17956.186103582382	used-gpu-memory=3361
56	converged=False	diverged=False	epoch=44	gradient-norm=0.443296315226172	learning-rate=0.0003	perplexity-train=5.22072199155471	perplexity-val=7.376774961894463	time-elapsed=18283.24115729332	used-gpu-memory=3361
57	converged=False	diverged=False	epoch=45	gradient-norm=0.4818426135651996	learning-rate=0.0003	perplexity-train=5.201945050225265	perplexity-val=7.423855214069615	time-elapsed=18611.074991941452	used-gpu-memory=3361
58	converged=False	diverged=False	epoch=46	gradient-norm=0.5333011317271881	learning-rate=0.0003	perplexity-train=5.173524908474285	perplexity-val=7.397793196039819	time-elapsed=18938.662901878357	used-gpu-memory=3361
59	converged=False	diverged=False	epoch=47	gradient-norm=0.5690968259715086	learning-rate=0.0003	perplexity-train=5.16760875424129	perplexity-val=7.387831084788999	time-elapsed=19265.883038282394	used-gpu-memory=3361
60	converged=False	diverged=False	epoch=48	gradient-norm=0.4533514394507489	learning-rate=0.0003	perplexity-train=5.185702241091749	perplexity-val=7.388041200275516	time-elapsed=19591.8679997921	used-gpu-memory=3361
61	converged=False	diverged=False	epoch=48	gradient-norm=0.5030422803677491	learning-rate=0.0003	perplexity-train=5.147533350717077	perplexity-val=7.345028997563343	time-elapsed=19918.360692977905	used-gpu-memory=3361
62	converged=False	diverged=False	epoch=49	gradient-norm=0.42752287416987333	learning-rate=0.0003	perplexity-train=5.111607748117565	perplexity-val=7.304151250260739	time-elapsed=20245.950645446777	used-gpu-memory=3361
63	converged=False	diverged=False	epoch=50	gradient-norm=0.4863004091299449	learning-rate=0.0003	perplexity-train=5.116495890242787	perplexity-val=7.31336757845619	time-elapsed=20572.86965060234	used-gpu-memory=3361
64	converged=False	diverged=False	epoch=51	gradient-norm=0.47001161333009794	learning-rate=0.0003	perplexity-train=5.1074456834608	perplexity-val=7.411215631150622	time-elapsed=20899.931036949158	used-gpu-memory=3361
65	converged=False	diverged=False	epoch=52	gradient-norm=0.49274229575683554	learning-rate=0.0003	perplexity-train=5.104245770479922	perplexity-val=7.27968441596697	time-elapsed=21227.318863391876	used-gpu-memory=3361
66	converged=False	diverged=False	epoch=52	gradient-norm=0.4579388856764924	learning-rate=0.0003	perplexity-train=5.080519839334593	perplexity-val=7.3237621082704525	time-elapsed=21552.538875341415	used-gpu-memory=3361
67	converged=False	diverged=False	epoch=53	gradient-norm=0.5293832886011327	learning-rate=0.0003	perplexity-train=5.044448491976738	perplexity-val=7.298804002485712	time-elapsed=21879.279705047607	used-gpu-memory=3361
68	converged=False	diverged=False	epoch=54	gradient-norm=0.5035027755465034	learning-rate=0.0003	perplexity-train=5.061456517340176	perplexity-val=7.315429484405844	time-elapsed=22205.08693265915	used-gpu-memory=3361
69	converged=False	diverged=False	epoch=55	gradient-norm=0.46212964715270827	learning-rate=0.0003	perplexity-train=5.040153888821404	perplexity-val=7.253889174776489	time-elapsed=22530.368744134903	used-gpu-memory=3361
70	converged=False	diverged=False	epoch=56	gradient-norm=0.42358967056734087	learning-rate=0.0003	perplexity-train=5.041335021893196	perplexity-val=7.280428338444152	time-elapsed=22856.150404453278	used-gpu-memory=3361
71	converged=False	diverged=False	epoch=56	gradient-norm=0.4617665570502806	learning-rate=0.0003	perplexity-train=5.029674752995102	perplexity-val=7.198899284096761	time-elapsed=23181.90758037567	used-gpu-memory=3361
72	converged=False	diverged=False	epoch=57	gradient-norm=0.4890089611948307	learning-rate=0.0003	perplexity-train=5.001204079557873	perplexity-val=7.286346357311544	time-elapsed=23508.18829011917	used-gpu-memory=3361
73	converged=False	diverged=False	epoch=58	gradient-norm=0.47467728369053225	learning-rate=0.0003	perplexity-train=4.9808943805623205	perplexity-val=7.17400611513952	time-elapsed=23834.50973534584	used-gpu-memory=3361
74	converged=False	diverged=False	epoch=59	gradient-norm=0.4989780133156477	learning-rate=0.0003	perplexity-train=4.998227762966794	perplexity-val=7.205527476125016	time-elapsed=24160.346216201782	used-gpu-memory=3361
75	converged=False	diverged=False	epoch=60	gradient-norm=0.4539223733979701	learning-rate=0.0003	perplexity-train=4.98141641095894	perplexity-val=7.187526763855525	time-elapsed=24486.032418727875	used-gpu-memory=3361
76	converged=False	diverged=False	epoch=60	gradient-norm=0.4342538568932537	learning-rate=0.0003	perplexity-train=4.991745047336588	perplexity-val=7.1997771369657535	time-elapsed=24811.297179937363	used-gpu-memory=3361
77	converged=False	diverged=False	epoch=61	gradient-norm=0.505521804786188	learning-rate=0.0003	perplexity-train=4.928564891259398	perplexity-val=7.21131335350047	time-elapsed=25136.994992256165	used-gpu-memory=3361
78	converged=False	diverged=False	epoch=62	gradient-norm=0.48519732029883367	learning-rate=0.0003	perplexity-train=4.949111072330433	perplexity-val=7.213310193202342	time-elapsed=25462.483828544617	used-gpu-memory=3361
79	converged=False	diverged=False	epoch=63	gradient-norm=0.456415274437001	learning-rate=0.0003	perplexity-train=4.927905062840474	perplexity-val=7.214688685527801	time-elapsed=25788.051424264908	used-gpu-memory=3361
80	converged=False	diverged=False	epoch=64	gradient-norm=0.46821856517019855	learning-rate=0.0003	perplexity-train=4.95714199121285	perplexity-val=7.173924053546271	time-elapsed=26114.91632604599	used-gpu-memory=3361
81	converged=False	diverged=False	epoch=64	gradient-norm=0.4291434675742278	learning-rate=0.0003	perplexity-train=4.9327154796020025	perplexity-val=7.14825209526296	time-elapsed=26439.457611083984	used-gpu-memory=3361
82	converged=False	diverged=False	epoch=65	gradient-norm=0.433170359869257	learning-rate=0.0003	perplexity-train=4.894220588765585	perplexity-val=7.139907923193636	time-elapsed=26766.03003358841	used-gpu-memory=3361
83	converged=False	diverged=False	epoch=66	gradient-norm=0.4579456923644503	learning-rate=0.0003	perplexity-train=4.893499435884712	perplexity-val=7.175284983438479	time-elapsed=27091.76671552658	used-gpu-memory=3361
84	converged=False	diverged=False	epoch=67	gradient-norm=0.45930680214148323	learning-rate=0.0003	perplexity-train=4.90661209188243	perplexity-val=7.148196101484173	time-elapsed=27417.285153865814	used-gpu-memory=3361
85	converged=False	diverged=False	epoch=68	gradient-norm=0.4719287519776752	learning-rate=0.0003	perplexity-train=4.895082368232998	perplexity-val=7.13636052993925	time-elapsed=27743.20832467079	used-gpu-memory=3361
86	converged=False	diverged=False	epoch=68	gradient-norm=0.48816486386238506	learning-rate=0.0003	perplexity-train=4.892730928147186	perplexity-val=7.103125887691235	time-elapsed=28068.62573671341	used-gpu-memory=3361
87	converged=False	diverged=False	epoch=69	gradient-norm=0.4623902402237249	learning-rate=0.0003	perplexity-train=4.850431378758329	perplexity-val=7.108991237604832	time-elapsed=28393.642332792282	used-gpu-memory=3361
88	converged=False	diverged=False	epoch=70	gradient-norm=0.43804461515861565	learning-rate=0.0003	perplexity-train=4.866790722594818	perplexity-val=7.100819100227742	time-elapsed=28720.17977285385	used-gpu-memory=3361
89	converged=False	diverged=False	epoch=71	gradient-norm=0.5068303071989263	learning-rate=0.0003	perplexity-train=4.84476352290643	perplexity-val=7.126524067001059	time-elapsed=29046.326859235764	used-gpu-memory=3361
90	converged=False	diverged=False	epoch=72	gradient-norm=0.49763224983462545	learning-rate=0.0003	perplexity-train=4.867088198898095	perplexity-val=7.102879267412403	time-elapsed=29371.285359621048	used-gpu-memory=3361
91	converged=False	diverged=False	epoch=72	gradient-norm=0.4132006321082411	learning-rate=0.0003	perplexity-train=4.8575568283974615	perplexity-val=6.993580892269602	time-elapsed=29696.968616485596	used-gpu-memory=3361
92	converged=False	diverged=False	epoch=73	gradient-norm=0.40909621800157053	learning-rate=0.0003	perplexity-train=4.8145215581126415	perplexity-val=7.096107588922601	time-elapsed=30023.215575933456	used-gpu-memory=3361
93	converged=False	diverged=False	epoch=74	gradient-norm=0.43994690651733165	learning-rate=0.0003	perplexity-train=4.827500047425586	perplexity-val=7.078512368448735	time-elapsed=30348.512532234192	used-gpu-memory=3361
94	converged=False	diverged=False	epoch=75	gradient-norm=0.4437006827296427	learning-rate=0.0003	perplexity-train=4.8077696117448205	perplexity-val=7.091881479118676	time-elapsed=30674.137933254242	used-gpu-memory=3361
95	converged=False	diverged=False	epoch=76	gradient-norm=0.46376450886602544	learning-rate=0.0003	perplexity-train=4.834189716129892	perplexity-val=7.043690608443564	time-elapsed=31000.066351890564	used-gpu-memory=3361
96	converged=False	diverged=False	epoch=76	gradient-norm=0.48040715805664547	learning-rate=0.0003	perplexity-train=4.825974137035117	perplexity-val=7.011180694715062	time-elapsed=31325.118568897247	used-gpu-memory=3361
97	converged=False	diverged=False	epoch=77	gradient-norm=0.4080628850197942	learning-rate=0.0003	perplexity-train=4.77232172815153	perplexity-val=7.022349446620537	time-elapsed=31651.443430662155	used-gpu-memory=3361
98	converged=False	diverged=False	epoch=78	gradient-norm=0.44983400283807146	learning-rate=0.0003	perplexity-train=4.7925118606263295	perplexity-val=6.984474048920266	time-elapsed=31977.452726840973	used-gpu-memory=3361
99	converged=False	diverged=False	epoch=79	gradient-norm=0.4413572830299798	learning-rate=0.0003	perplexity-train=4.78201952078406	perplexity-val=7.077206719419534	time-elapsed=32301.971838712692	used-gpu-memory=3361
100	converged=False	diverged=False	epoch=80	gradient-norm=0.46319547928647486	learning-rate=0.0003	perplexity-train=4.794047721537519	perplexity-val=7.035701620671927	time-elapsed=32627.69130897522	used-gpu-memory=3361
101	converged=False	diverged=False	epoch=80	gradient-norm=0.401983280932588	learning-rate=0.0003	perplexity-train=4.792448577628386	perplexity-val=6.970044993675859	time-elapsed=32953.04908871651	used-gpu-memory=3361
102	converged=False	diverged=False	epoch=81	gradient-norm=0.41452273875745277	learning-rate=0.0003	perplexity-train=4.749825489621259	perplexity-val=7.015782104159219	time-elapsed=33278.37569475174	used-gpu-memory=3361
103	converged=False	diverged=False	epoch=82	gradient-norm=0.43434254556997254	learning-rate=0.0003	perplexity-train=4.75328889634192	perplexity-val=6.992053153622179	time-elapsed=33604.2210290432	used-gpu-memory=3361
104	converged=False	diverged=False	epoch=83	gradient-norm=0.43854569805020377	learning-rate=0.0003	perplexity-train=4.748482546215831	perplexity-val=7.004805582821436	time-elapsed=33929.36448144913	used-gpu-memory=3361
105	converged=False	diverged=False	epoch=84	gradient-norm=0.3970735286998163	learning-rate=0.0003	perplexity-train=4.7584687881052234	perplexity-val=7.022635110182351	time-elapsed=34254.878976106644	used-gpu-memory=3361
106	converged=False	diverged=False	epoch=84	gradient-norm=0.4269641722527061	learning-rate=0.0003	perplexity-train=4.767218706730724	perplexity-val=6.982531615672006	time-elapsed=34580.123168706894	used-gpu-memory=3361
107	converged=False	diverged=False	epoch=85	gradient-norm=0.42642108527333605	learning-rate=0.0003	perplexity-train=4.716913468895128	perplexity-val=6.957260719867596	time-elapsed=34905.95066571236	used-gpu-memory=3361
108	converged=False	diverged=False	epoch=86	gradient-norm=0.45621188248164596	learning-rate=0.0003	perplexity-train=4.723081629140745	perplexity-val=6.992398263624037	time-elapsed=35231.88329219818	used-gpu-memory=3361
109	converged=False	diverged=False	epoch=87	gradient-norm=0.4593876458598436	learning-rate=0.0003	perplexity-train=4.722686761191691	perplexity-val=6.914009659041347	time-elapsed=35557.885747909546	used-gpu-memory=3361
110	converged=False	diverged=False	epoch=88	gradient-norm=0.448542005298899	learning-rate=0.0003	perplexity-train=4.7304997043177375	perplexity-val=6.984145568119943	time-elapsed=35883.907977342606	used-gpu-memory=3361
111	converged=False	diverged=False	epoch=88	gradient-norm=0.43162064727180255	learning-rate=0.0003	perplexity-train=4.738193282524529	perplexity-val=6.953827756099559	time-elapsed=36209.13308787346	used-gpu-memory=3361
112	converged=False	diverged=False	epoch=89	gradient-norm=0.4481126415467439	learning-rate=0.0003	perplexity-train=4.69167850164013	perplexity-val=6.997354052583114	time-elapsed=36535.83171248436	used-gpu-memory=3361
113	converged=False	diverged=False	epoch=90	gradient-norm=0.397027508426508	learning-rate=0.0003	perplexity-train=4.696385938192361	perplexity-val=7.001040072023591	time-elapsed=36862.48151350021	used-gpu-memory=3361
114	converged=False	diverged=False	epoch=91	gradient-norm=0.44030525419838423	learning-rate=0.0003	perplexity-train=4.696388228932877	perplexity-val=6.985135180412601	time-elapsed=37187.26042342186	used-gpu-memory=3417
115	converged=False	diverged=False	epoch=92	gradient-norm=0.5297222406751263	learning-rate=0.0003	perplexity-train=4.6921642283390765	perplexity-val=6.976323843163854	time-elapsed=37512.38652586937	used-gpu-memory=3417
116	converged=False	diverged=False	epoch=92	gradient-norm=0.4666552874048141	learning-rate=0.0003	perplexity-train=4.719981231030671	perplexity-val=6.943709324457942	time-elapsed=37837.49160814285	used-gpu-memory=3417
117	converged=False	diverged=False	epoch=93	gradient-norm=0.3947897716403542	learning-rate=0.0003	perplexity-train=4.670249381744774	perplexity-val=6.945688615390117	time-elapsed=38162.634279727936	used-gpu-memory=3417
118	converged=False	diverged=False	epoch=94	gradient-norm=0.4027018226542863	learning-rate=0.00020999999999999998	perplexity-train=4.600794510574724	perplexity-val=6.9480014827969505	time-elapsed=38489.75541591644	used-gpu-memory=3417
119	converged=False	diverged=False	epoch=95	gradient-norm=0.49174939574835186	learning-rate=0.00020999999999999998	perplexity-train=4.6070202235256215	perplexity-val=6.897686122537083	time-elapsed=38815.93764305115	used-gpu-memory=3417
120	converged=False	diverged=False	epoch=96	gradient-norm=0.42843166621950984	learning-rate=0.00020999999999999998	perplexity-train=4.607701057569624	perplexity-val=6.9080934632586155	time-elapsed=39142.21670484543	used-gpu-memory=3417
121	converged=False	diverged=False	epoch=96	gradient-norm=0.47572234964419624	learning-rate=0.00020999999999999998	perplexity-train=4.603627374747001	perplexity-val=6.872503118057837	time-elapsed=39468.09905362129	used-gpu-memory=3417
122	converged=False	diverged=False	epoch=97	gradient-norm=0.4464129838842231	learning-rate=0.00020999999999999998	perplexity-train=4.566445716791233	perplexity-val=6.863974463169684	time-elapsed=39794.87662291527	used-gpu-memory=3417
123	converged=False	diverged=False	epoch=98	gradient-norm=0.41016887213400355	learning-rate=0.00020999999999999998	perplexity-train=4.55924498790571	perplexity-val=6.851685696173966	time-elapsed=40121.71932077408	used-gpu-memory=3417
124	converged=False	diverged=False	epoch=99	gradient-norm=0.4347405283579662	learning-rate=0.00020999999999999998	perplexity-train=4.570367400914027	perplexity-val=6.937020122736491	time-elapsed=40447.36279463768	used-gpu-memory=3417
125	converged=False	diverged=False	epoch=100	gradient-norm=0.3803638514767094	learning-rate=0.00020999999999999998	perplexity-train=4.568293936575203	perplexity-val=6.87871018523783	time-elapsed=40772.642907619476	used-gpu-memory=3417
126	converged=False	diverged=False	epoch=101	gradient-norm=0.4396049643727484	learning-rate=0.00020999999999999998	perplexity-train=4.576638213113957	perplexity-val=6.897114511218534	time-elapsed=41098.66006851196	used-gpu-memory=3417
127	converged=False	diverged=False	epoch=101	gradient-norm=0.3837448250116131	learning-rate=0.00020999999999999998	perplexity-train=4.543868177196598	perplexity-val=6.863930732667445	time-elapsed=41423.539956092834	used-gpu-memory=3417
128	converged=False	diverged=False	epoch=102	gradient-norm=0.40535851724918776	learning-rate=0.00020999999999999998	perplexity-train=4.535493314291426	perplexity-val=6.814873598264191	time-elapsed=41749.8184902668	used-gpu-memory=3417
129	converged=False	diverged=False	epoch=103	gradient-norm=0.4034052747037826	learning-rate=0.00020999999999999998	perplexity-train=4.550192309620866	perplexity-val=6.860768101176142	time-elapsed=42075.793184280396	used-gpu-memory=3417
130	converged=False	diverged=False	epoch=104	gradient-norm=0.40029172054091705	learning-rate=0.00020999999999999998	perplexity-train=4.544047580244311	perplexity-val=6.871832207657197	time-elapsed=42404.78179502487	used-gpu-memory=3417
131	converged=False	diverged=False	epoch=105	gradient-norm=0.4415976412810714	learning-rate=0.00020999999999999998	perplexity-train=4.550308152275454	perplexity-val=6.831338876246494	time-elapsed=42732.0538520813	used-gpu-memory=3417
132	converged=False	diverged=False	epoch=105	gradient-norm=0.4696878713527808	learning-rate=0.00020999999999999998	perplexity-train=4.519529723475039	perplexity-val=6.842602202083098	time-elapsed=43059.61301970482	used-gpu-memory=3417
133	converged=False	diverged=False	epoch=106	gradient-norm=0.4630388001224335	learning-rate=0.00020999999999999998	perplexity-train=4.527629943291673	perplexity-val=6.860211169049412	time-elapsed=43387.76334857941	used-gpu-memory=3417
134	converged=False	diverged=False	epoch=107	gradient-norm=0.4241358173711561	learning-rate=0.00020999999999999998	perplexity-train=4.524183268119752	perplexity-val=6.8324058400675	time-elapsed=43715.568476200104	used-gpu-memory=3417
135	converged=False	diverged=False	epoch=108	gradient-norm=0.48578232336692206	learning-rate=0.00020999999999999998	perplexity-train=4.518616446494967	perplexity-val=6.861463106965609	time-elapsed=44042.781566143036	used-gpu-memory=3417
136	converged=False	diverged=False	epoch=109	gradient-norm=0.4635647183689507	learning-rate=0.00020999999999999998	perplexity-train=4.54401170239572	perplexity-val=6.8710804563992545	time-elapsed=44369.217380046844	used-gpu-memory=3417
137	converged=False	diverged=False	epoch=109	gradient-norm=0.39155073718373096	learning-rate=0.00014699999999999997	perplexity-train=4.476749840439109	perplexity-val=6.826211200965586	time-elapsed=44696.23295402527	used-gpu-memory=3417
138	converged=False	diverged=False	epoch=110	gradient-norm=0.3869295208158403	learning-rate=0.00014699999999999997	perplexity-train=4.471317465799904	perplexity-val=6.8363222916735245	time-elapsed=45022.710173368454	used-gpu-memory=3417
139	converged=False	diverged=False	epoch=111	gradient-norm=0.5034590887643663	learning-rate=0.00014699999999999997	perplexity-train=4.451060955916818	perplexity-val=6.844605106787683	time-elapsed=45349.01574206352	used-gpu-memory=3417
140	converged=False	diverged=False	epoch=112	gradient-norm=0.43881419061243837	learning-rate=0.00014699999999999997	perplexity-train=4.470200582249439	perplexity-val=6.849938673761017	time-elapsed=45677.60744404793	used-gpu-memory=3417
141	converged=False	diverged=False	epoch=113	gradient-norm=0.43656087024444995	learning-rate=0.00014699999999999997	perplexity-train=4.461117340000588	perplexity-val=6.826676805266239	time-elapsed=46004.25722980499	used-gpu-memory=3417
142	converged=False	diverged=False	epoch=113	gradient-norm=0.42512589907843445	learning-rate=0.00014699999999999997	perplexity-train=4.435218885265663	perplexity-val=6.814071792917246	time-elapsed=46329.5141646862	used-gpu-memory=3417
143	converged=False	diverged=False	epoch=114	gradient-norm=0.4223832943806389	learning-rate=0.00014699999999999997	perplexity-train=4.446846725179406	perplexity-val=6.848216508546938	time-elapsed=46656.163491010666	used-gpu-memory=3417
144	converged=False	diverged=False	epoch=115	gradient-norm=0.40374371484654714	learning-rate=0.00014699999999999997	perplexity-train=4.427157954274464	perplexity-val=6.82996499318469	time-elapsed=46982.22524857521	used-gpu-memory=3417
145	converged=False	diverged=False	epoch=116	gradient-norm=0.42875458233825486	learning-rate=0.00014699999999999997	perplexity-train=4.438540444489964	perplexity-val=6.868734025077386	time-elapsed=47308.04705739021	used-gpu-memory=3417
146	converged=False	diverged=False	epoch=117	gradient-norm=0.38673870991898573	learning-rate=0.00014699999999999997	perplexity-train=4.448617168387923	perplexity-val=6.830442112726887	time-elapsed=47634.0184173584	used-gpu-memory=3417
147	converged=False	diverged=False	epoch=117	gradient-norm=0.41011135952132927	learning-rate=0.00014699999999999997	perplexity-train=4.422855201070472	perplexity-val=6.794845940628323	time-elapsed=47960.25499391556	used-gpu-memory=3417
148	converged=False	diverged=False	epoch=118	gradient-norm=0.4066357248841164	learning-rate=0.00014699999999999997	perplexity-train=4.423270435824857	perplexity-val=6.810490118484224	time-elapsed=48287.28960824013	used-gpu-memory=3417
149	converged=False	diverged=False	epoch=119	gradient-norm=0.40398169844425447	learning-rate=0.00014699999999999997	perplexity-train=4.419367554098166	perplexity-val=6.79775225224283	time-elapsed=48613.29436516762	used-gpu-memory=3417
150	converged=False	diverged=False	epoch=120	gradient-norm=0.4212216090498136	learning-rate=0.00014699999999999997	perplexity-train=4.431185526129251	perplexity-val=6.841074856858577	time-elapsed=48939.827456474304	used-gpu-memory=3417
151	converged=False	diverged=False	epoch=121	gradient-norm=0.4371256999915466	learning-rate=0.00014699999999999997	perplexity-train=4.422108229662649	perplexity-val=6.786076895175095	time-elapsed=49266.409528017044	used-gpu-memory=3417
152	converged=False	diverged=False	epoch=121	gradient-norm=0.4373623745782219	learning-rate=0.00014699999999999997	perplexity-train=4.405183448131102	perplexity-val=6.769110121802031	time-elapsed=49593.010996580124	used-gpu-memory=3417
153	converged=False	diverged=False	epoch=122	gradient-norm=0.462785237417426	learning-rate=0.00014699999999999997	perplexity-train=4.413657835188827	perplexity-val=6.831379576159113	time-elapsed=49918.39697313309	used-gpu-memory=3417
154	converged=False	diverged=False	epoch=123	gradient-norm=0.41111841604608834	learning-rate=0.00014699999999999997	perplexity-train=4.404028509367827	perplexity-val=6.866977416983377	time-elapsed=50244.07267856598	used-gpu-memory=3417
155	converged=False	diverged=False	epoch=124	gradient-norm=0.4301302707201015	learning-rate=0.00014699999999999997	perplexity-train=4.417208109873283	perplexity-val=6.805588238147081	time-elapsed=50570.13857603073	used-gpu-memory=3417
156	converged=False	diverged=False	epoch=125	gradient-norm=0.4414171720884166	learning-rate=0.00014699999999999997	perplexity-train=4.418216450184267	perplexity-val=6.799540246094318	time-elapsed=50895.95012664795	used-gpu-memory=3417
157	converged=False	diverged=False	epoch=125	gradient-norm=0.4259378688064779	learning-rate=0.00014699999999999997	perplexity-train=4.3970421339530485	perplexity-val=6.797094318485067	time-elapsed=51220.86370229721	used-gpu-memory=3417
158	converged=False	diverged=False	epoch=126	gradient-norm=0.41618761555151973	learning-rate=0.00014699999999999997	perplexity-train=4.393148115301128	perplexity-val=6.810742792741856	time-elapsed=51547.6173582077	used-gpu-memory=3417
159	converged=False	diverged=False	epoch=127	gradient-norm=0.42765569936377956	learning-rate=0.00014699999999999997	perplexity-train=4.395561654908917	perplexity-val=6.807634128717056	time-elapsed=51873.75053310394	used-gpu-memory=3417
160	converged=False	diverged=False	epoch=128	gradient-norm=0.4090924846826931	learning-rate=0.00014699999999999997	perplexity-train=4.398913842944852	perplexity-val=6.823265610499237	time-elapsed=52200.20144915581	used-gpu-memory=3417
161	converged=False	diverged=False	epoch=129	gradient-norm=0.47066506282707127	learning-rate=0.00010289999999999997	perplexity-train=4.361388251203895	perplexity-val=6.789055727016128	time-elapsed=52527.18896317482	used-gpu-memory=3417
162	converged=False	diverged=False	epoch=129	gradient-norm=0.4308644696727751	learning-rate=0.00010289999999999997	perplexity-train=4.368026005203347	perplexity-val=6.788376162713003	time-elapsed=52852.977630615234	used-gpu-memory=3417
163	converged=False	diverged=False	epoch=130	gradient-norm=0.425715169997964	learning-rate=0.00010289999999999997	perplexity-train=4.346264747604116	perplexity-val=6.795618266017251	time-elapsed=53179.19990682602	used-gpu-memory=3417
164	converged=False	diverged=False	epoch=131	gradient-norm=0.40938585690930707	learning-rate=0.00010289999999999997	perplexity-train=4.350610180265392	perplexity-val=6.766829210161924	time-elapsed=53505.73752737045	used-gpu-memory=3417
165	converged=False	diverged=False	epoch=132	gradient-norm=0.4291084854189508	learning-rate=0.00010289999999999997	perplexity-train=4.362990801902206	perplexity-val=6.799380800248465	time-elapsed=53831.539981126785	used-gpu-memory=3417
166	converged=False	diverged=False	epoch=133	gradient-norm=0.46916190258658425	learning-rate=0.00010289999999999997	perplexity-train=4.349308349125506	perplexity-val=6.772546062936261	time-elapsed=54157.168014764786	used-gpu-memory=3417
167	converged=False	diverged=False	epoch=133	gradient-norm=0.429115688080544	learning-rate=0.00010289999999999997	perplexity-train=4.345947075118382	perplexity-val=6.7605655772954965	time-elapsed=54483.273069381714	used-gpu-memory=3417
168	converged=False	diverged=False	epoch=134	gradient-norm=0.42149224867263774	learning-rate=0.00010289999999999997	perplexity-train=4.330936160459657	perplexity-val=6.7995645461671685	time-elapsed=54811.3584895134	used-gpu-memory=3417
169	converged=False	diverged=False	epoch=135	gradient-norm=0.4095359034352268	learning-rate=0.00010289999999999997	perplexity-train=4.3351391617168495	perplexity-val=6.76974665593972	time-elapsed=55137.14297294617	used-gpu-memory=3417
170	converged=False	diverged=False	epoch=136	gradient-norm=0.42110212789060786	learning-rate=0.00010289999999999997	perplexity-train=4.341867893193019	perplexity-val=6.793224432467329	time-elapsed=55463.13656544685	used-gpu-memory=3417
171	converged=False	diverged=False	epoch=137	gradient-norm=0.4461176724536313	learning-rate=0.00010289999999999997	perplexity-train=4.341262354298282	perplexity-val=6.762095091532795	time-elapsed=55788.91458249092	used-gpu-memory=3417
172	converged=False	diverged=False	epoch=137	gradient-norm=0.4471532561684983	learning-rate=0.00010289999999999997	perplexity-train=4.333246408477888	perplexity-val=6.783276053652927	time-elapsed=56113.76790928841	used-gpu-memory=3417
173	converged=False	diverged=False	epoch=138	gradient-norm=0.4369119818079433	learning-rate=0.00010289999999999997	perplexity-train=4.324735220601634	perplexity-val=6.788650242997046	time-elapsed=56440.20629668236	used-gpu-memory=3417
174	converged=False	diverged=False	epoch=139	gradient-norm=0.4063317017740137	learning-rate=0.00010289999999999997	perplexity-train=4.323434324597486	perplexity-val=6.791162642182321	time-elapsed=56765.92255806923	used-gpu-memory=3417
175	converged=False	diverged=False	epoch=140	gradient-norm=0.4214917212179861	learning-rate=0.00010289999999999997	perplexity-train=4.325854082832691	perplexity-val=6.781122223654776	time-elapsed=57091.54587697983	used-gpu-memory=3417
176	converged=False	diverged=False	epoch=141	gradient-norm=0.440668570748811	learning-rate=7.202999999999997e-05	perplexity-train=4.307701357607111	perplexity-val=6.7723579420852165	time-elapsed=57418.251596450806	used-gpu-memory=3417
177	converged=False	diverged=False	epoch=141	gradient-norm=0.46835660920490946	learning-rate=7.202999999999997e-05	perplexity-train=4.314333636223076	perplexity-val=6.7604603085003605	time-elapsed=57743.858874320984	used-gpu-memory=3417
178	converged=False	diverged=False	epoch=142	gradient-norm=0.41373634955124594	learning-rate=7.202999999999997e-05	perplexity-train=4.296609261451728	perplexity-val=6.765775958684682	time-elapsed=58068.85897612572	used-gpu-memory=3417
179	converged=False	diverged=False	epoch=143	gradient-norm=0.39533509870900185	learning-rate=7.202999999999997e-05	perplexity-train=4.293905915812617	perplexity-val=6.769396089047606	time-elapsed=58396.01526975632	used-gpu-memory=3417
180	converged=False	diverged=False	epoch=144	gradient-norm=0.44716493284149783	learning-rate=7.202999999999997e-05	perplexity-train=4.3007120423089376	perplexity-val=6.7525642685330824	time-elapsed=58722.07468891144	used-gpu-memory=3417
181	converged=False	diverged=False	epoch=145	gradient-norm=0.45775100994877166	learning-rate=7.202999999999997e-05	perplexity-train=4.305154734428293	perplexity-val=6.73466306574771	time-elapsed=59048.855459690094	used-gpu-memory=3417
182	converged=False	diverged=False	epoch=145	gradient-norm=0.5019547051198893	learning-rate=7.202999999999997e-05	perplexity-train=4.297869186656442	perplexity-val=6.727338155796042	time-elapsed=59375.1196141243	used-gpu-memory=3417
183	converged=False	diverged=False	epoch=146	gradient-norm=0.4810736002924898	learning-rate=7.202999999999997e-05	perplexity-train=4.278746989345699	perplexity-val=6.749825809853956	time-elapsed=59701.14037442207	used-gpu-memory=3417
184	converged=False	diverged=False	epoch=147	gradient-norm=0.48174895464386047	learning-rate=7.202999999999997e-05	perplexity-train=4.289723575142368	perplexity-val=6.764755801648647	time-elapsed=60026.706206321716	used-gpu-memory=3417
185	converged=False	diverged=False	epoch=148	gradient-norm=0.4300565415103943	learning-rate=7.202999999999997e-05	perplexity-train=4.288450686252724	perplexity-val=6.745922978136693	time-elapsed=60352.35164499283	used-gpu-memory=3417
186	converged=False	diverged=False	epoch=149	gradient-norm=0.4353542838345044	learning-rate=7.202999999999997e-05	perplexity-train=4.289045912610118	perplexity-val=6.772875817535537	time-elapsed=60677.942373514175	used-gpu-memory=3417
187	converged=False	diverged=False	epoch=149	gradient-norm=0.4447394274497093	learning-rate=7.202999999999997e-05	perplexity-train=4.284621685273533	perplexity-val=6.755378350414861	time-elapsed=61003.859610795975	used-gpu-memory=3417
188	converged=False	diverged=False	epoch=150	gradient-norm=0.42140213006537197	learning-rate=7.202999999999997e-05	perplexity-train=4.282771770469975	perplexity-val=6.769714623381335	time-elapsed=61329.46802735329	used-gpu-memory=3417
189	converged=False	diverged=False	epoch=151	gradient-norm=0.4345200842037778	learning-rate=7.202999999999997e-05	perplexity-train=4.275522060021662	perplexity-val=6.783542997085138	time-elapsed=61656.42537546158	used-gpu-memory=3417
190	converged=False	diverged=False	epoch=152	gradient-norm=0.3665142220283741	learning-rate=7.202999999999997e-05	perplexity-train=4.277470842395584	perplexity-val=6.769626396293044	time-elapsed=61982.47367763519	used-gpu-memory=3417
191	converged=False	diverged=False	epoch=153	gradient-norm=0.5340878444840974	learning-rate=5.042099999999998e-05	perplexity-train=4.259558799707735	perplexity-val=6.761229922910694	time-elapsed=62309.125626564026	used-gpu-memory=3417
192	converged=False	diverged=False	epoch=153	gradient-norm=0.4267172816109053	learning-rate=5.042099999999998e-05	perplexity-train=4.274594289746947	perplexity-val=6.743337624253539	time-elapsed=62634.27131962776	used-gpu-memory=3417
193	converged=False	diverged=False	epoch=154	gradient-norm=0.4331484514404515	learning-rate=5.042099999999998e-05	perplexity-train=4.263271457429648	perplexity-val=6.768336106607029	time-elapsed=62960.40447330475	used-gpu-memory=3417
194	converged=False	diverged=False	epoch=155	gradient-norm=0.4698408578098666	learning-rate=5.042099999999998e-05	perplexity-train=4.258071823514113	perplexity-val=6.754604739817976	time-elapsed=63285.740827560425	used-gpu-memory=3417
195	converged=False	diverged=False	epoch=156	gradient-norm=0.4349878246363429	learning-rate=5.042099999999998e-05	perplexity-train=4.26423018690781	perplexity-val=6.764101307243784	time-elapsed=63612.36541748047	used-gpu-memory=3417
196	converged=False	diverged=False	epoch=157	gradient-norm=0.4125073619127446	learning-rate=5.042099999999998e-05	perplexity-train=4.2618283066946185	perplexity-val=6.756361104721226	time-elapsed=63938.213527441025	used-gpu-memory=3417
197	converged=False	diverged=False	epoch=157	gradient-norm=0.3979371274995763	learning-rate=5.042099999999998e-05	perplexity-train=4.262094751981613	perplexity-val=6.753692349015688	time-elapsed=64263.309351205826	used-gpu-memory=3417
198	converged=True	diverged=False	epoch=158	gradient-norm=0.4619013647079466	learning-rate=5.042099999999998e-05	perplexity-train=4.249433081952299	perplexity-val=6.755635759689883	time-elapsed=64588.99461364746	used-gpu-memory=3417
