1	converged=False	diverged=False	epoch=1	gradient-norm=0.7062639368374484	learning-rate=0.0006	perplexity-train=68.62176683435665	perplexity-val=34.83030925093378	time-elapsed=515.392578125	used-gpu-memory=2731
2	converged=False	diverged=False	epoch=2	gradient-norm=0.5663463290202136	learning-rate=0.0006	perplexity-train=14.276372554565407	perplexity-val=15.776512219846113	time-elapsed=1035.6836352348328	used-gpu-memory=2731
3	converged=False	diverged=False	epoch=3	gradient-norm=0.5574631111464092	learning-rate=0.0006	perplexity-train=9.294530276512054	perplexity-val=10.60601511442684	time-elapsed=1555.4472217559814	used-gpu-memory=2731
4	converged=False	diverged=False	epoch=4	gradient-norm=0.49020868525651395	learning-rate=0.0006	perplexity-train=7.660503398015293	perplexity-val=8.678689397392539	time-elapsed=2077.893005847931	used-gpu-memory=2731
5	converged=False	diverged=False	epoch=5	gradient-norm=0.5042749155831081	learning-rate=0.0006	perplexity-train=6.768567768690572	perplexity-val=7.738531233890533	time-elapsed=2599.70942401886	used-gpu-memory=2731
6	converged=False	diverged=False	epoch=6	gradient-norm=0.4191530696146336	learning-rate=0.0006	perplexity-train=6.25876292326508	perplexity-val=6.959174569801882	time-elapsed=3121.9715332984924	used-gpu-memory=2731
7	converged=False	diverged=False	epoch=7	gradient-norm=0.4781861278102866	learning-rate=0.0006	perplexity-train=5.8436642232393385	perplexity-val=6.632906914900307	time-elapsed=3645.737297773361	used-gpu-memory=2731
8	converged=False	diverged=False	epoch=8	gradient-norm=0.5114275217395132	learning-rate=0.0006	perplexity-train=5.578035321087581	perplexity-val=6.385640924634896	time-elapsed=4166.376551866531	used-gpu-memory=2731
9	converged=False	diverged=False	epoch=9	gradient-norm=0.44611789593811085	learning-rate=0.0006	perplexity-train=5.413650146302219	perplexity-val=6.1991823647279265	time-elapsed=4688.586256980896	used-gpu-memory=2731
10	converged=False	diverged=False	epoch=10	gradient-norm=0.44160851397657896	learning-rate=0.0006	perplexity-train=5.167768737647633	perplexity-val=5.959129805566059	time-elapsed=5210.242062091827	used-gpu-memory=2731
11	converged=False	diverged=False	epoch=11	gradient-norm=0.3718328066036906	learning-rate=0.0006	perplexity-train=5.132115248166423	perplexity-val=5.806517558331799	time-elapsed=5735.134143590927	used-gpu-memory=2731
12	converged=False	diverged=False	epoch=12	gradient-norm=0.5170215605950981	learning-rate=0.0006	perplexity-train=4.913885426075406	perplexity-val=5.809483864723857	time-elapsed=6253.20085144043	used-gpu-memory=2731
13	converged=False	diverged=False	epoch=13	gradient-norm=0.3822390278138801	learning-rate=0.0006	perplexity-train=4.8653707432269035	perplexity-val=5.61150345125975	time-elapsed=6776.5923528671265	used-gpu-memory=2731
14	converged=False	diverged=False	epoch=14	gradient-norm=0.4501473855841719	learning-rate=0.0006	perplexity-train=4.7619135646952815	perplexity-val=5.519520858191175	time-elapsed=7300.006785869598	used-gpu-memory=2731
15	converged=False	diverged=False	epoch=15	gradient-norm=0.3263432695386053	learning-rate=0.0006	perplexity-train=4.6767853401610715	perplexity-val=5.425379162092972	time-elapsed=7824.610369682312	used-gpu-memory=2731
16	converged=False	diverged=False	epoch=16	gradient-norm=0.36041717147872365	learning-rate=0.0006	perplexity-train=4.574985746574598	perplexity-val=5.402964756988653	time-elapsed=8345.887151956558	used-gpu-memory=2731
17	converged=False	diverged=False	epoch=17	gradient-norm=0.4891758573807338	learning-rate=0.0006	perplexity-train=4.56246129938743	perplexity-val=5.296582976884455	time-elapsed=8871.340514421463	used-gpu-memory=2731
18	converged=False	diverged=False	epoch=18	gradient-norm=0.497419483501438	learning-rate=0.0006	perplexity-train=4.505846193379742	perplexity-val=5.295278597809445	time-elapsed=9392.634059429169	used-gpu-memory=2731
19	converged=False	diverged=False	epoch=19	gradient-norm=0.3620404806380917	learning-rate=0.0006	perplexity-train=4.423874380206743	perplexity-val=5.258735957987847	time-elapsed=9915.948189020157	used-gpu-memory=2731
20	converged=False	diverged=False	epoch=20	gradient-norm=0.34777198722340963	learning-rate=0.0006	perplexity-train=4.396878237112018	perplexity-val=5.196165437035715	time-elapsed=10435.691890716553	used-gpu-memory=2731
21	converged=False	diverged=False	epoch=21	gradient-norm=0.41091906006753376	learning-rate=0.0006	perplexity-train=4.338547525882013	perplexity-val=5.19386714004442	time-elapsed=10957.580180168152	used-gpu-memory=2731
22	converged=False	diverged=False	epoch=23	gradient-norm=0.34463512961881176	learning-rate=0.0006	perplexity-train=4.294957792457787	perplexity-val=5.166602646985	time-elapsed=11480.199525117874	used-gpu-memory=2731
23	converged=False	diverged=False	epoch=24	gradient-norm=0.35047407799717867	learning-rate=0.0006	perplexity-train=4.264657177049467	perplexity-val=5.1619857328258565	time-elapsed=11999.703635931015	used-gpu-memory=2731
24	converged=False	diverged=False	epoch=25	gradient-norm=0.2993759705300531	learning-rate=0.0006	perplexity-train=4.21490626406775	perplexity-val=5.1009780738343	time-elapsed=12516.939425945282	used-gpu-memory=2731
25	converged=False	diverged=False	epoch=26	gradient-norm=0.40676431799107116	learning-rate=0.0006	perplexity-train=4.1910333690083	perplexity-val=5.131672305360443	time-elapsed=13040.318269968033	used-gpu-memory=2731
26	converged=False	diverged=False	epoch=27	gradient-norm=0.3972820546468248	learning-rate=0.0006	perplexity-train=4.159185101032513	perplexity-val=5.104799659783698	time-elapsed=13558.438120603561	used-gpu-memory=2731
27	converged=False	diverged=False	epoch=28	gradient-norm=0.3377125522007017	learning-rate=0.0006	perplexity-train=4.140165223492075	perplexity-val=5.052109486707185	time-elapsed=14078.941056728363	used-gpu-memory=2731
28	converged=False	diverged=False	epoch=29	gradient-norm=0.39040507970166943	learning-rate=0.0006	perplexity-train=4.115875981243476	perplexity-val=5.073340281989634	time-elapsed=14597.496885538101	used-gpu-memory=2731
29	converged=False	diverged=False	epoch=30	gradient-norm=0.3187266632650932	learning-rate=0.0006	perplexity-train=4.0865174074873005	perplexity-val=5.043877770690594	time-elapsed=15121.975445747375	used-gpu-memory=2731
30	converged=False	diverged=False	epoch=31	gradient-norm=0.35555430098429913	learning-rate=0.0006	perplexity-train=4.058040752875113	perplexity-val=5.018492014510616	time-elapsed=15643.540189266205	used-gpu-memory=2731
31	converged=False	diverged=False	epoch=32	gradient-norm=0.3312097493421074	learning-rate=0.0006	perplexity-train=4.029612873512936	perplexity-val=4.986362600376266	time-elapsed=16164.989620923996	used-gpu-memory=2731
32	converged=False	diverged=False	epoch=33	gradient-norm=0.3240940935908297	learning-rate=0.0006	perplexity-train=4.028240059329663	perplexity-val=4.963979968783059	time-elapsed=16680.36886382103	used-gpu-memory=2731
33	converged=False	diverged=False	epoch=34	gradient-norm=0.3286708571924321	learning-rate=0.0006	perplexity-train=3.9915157106340793	perplexity-val=4.968875488140268	time-elapsed=17196.849638462067	used-gpu-memory=2731
34	converged=False	diverged=False	epoch=35	gradient-norm=0.28477650641718577	learning-rate=0.0006	perplexity-train=3.9962180608213873	perplexity-val=4.972621621467123	time-elapsed=17711.441277742386	used-gpu-memory=2731
35	converged=False	diverged=False	epoch=36	gradient-norm=0.3630063302635505	learning-rate=0.0006	perplexity-train=3.9426001092301965	perplexity-val=4.939212782180227	time-elapsed=18228.674296855927	used-gpu-memory=2731
36	converged=False	diverged=False	epoch=37	gradient-norm=0.30015686155063726	learning-rate=0.0006	perplexity-train=3.9479897108782533	perplexity-val=4.986056855097793	time-elapsed=18743.235910892487	used-gpu-memory=2731
37	converged=False	diverged=False	epoch=38	gradient-norm=0.3848811255173557	learning-rate=0.0006	perplexity-train=3.917346815027016	perplexity-val=4.920023046713398	time-elapsed=19258.324808597565	used-gpu-memory=2731
38	converged=False	diverged=False	epoch=39	gradient-norm=0.33568330545027125	learning-rate=0.0006	perplexity-train=3.912740022432905	perplexity-val=4.9095265992824775	time-elapsed=19773.244496822357	used-gpu-memory=2731
39	converged=False	diverged=False	epoch=40	gradient-norm=0.33360670115209323	learning-rate=0.0006	perplexity-train=3.885664505506671	perplexity-val=4.906503732998132	time-elapsed=20288.88442826271	used-gpu-memory=2731
40	converged=False	diverged=False	epoch=41	gradient-norm=0.37546465950968044	learning-rate=0.0006	perplexity-train=3.8826688304578094	perplexity-val=4.9200818719981	time-elapsed=20804.766139745712	used-gpu-memory=2731
41	converged=False	diverged=False	epoch=42	gradient-norm=0.3169552581571588	learning-rate=0.0006	perplexity-train=3.8630861648188874	perplexity-val=4.848950870273107	time-elapsed=21323.745332479477	used-gpu-memory=2731
42	converged=False	diverged=False	epoch=43	gradient-norm=0.3494820577719058	learning-rate=0.0006	perplexity-train=3.85249324283341	perplexity-val=4.838767006352456	time-elapsed=21839.639266729355	used-gpu-memory=2731
43	converged=False	diverged=False	epoch=45	gradient-norm=0.3236886611074686	learning-rate=0.0006	perplexity-train=3.8328567339209942	perplexity-val=4.870864735474538	time-elapsed=22357.443945884705	used-gpu-memory=2731
44	converged=False	diverged=False	epoch=46	gradient-norm=0.33774412175563934	learning-rate=0.0006	perplexity-train=3.804616474786597	perplexity-val=4.871451937025096	time-elapsed=22872.48278069496	used-gpu-memory=2731
45	converged=False	diverged=False	epoch=47	gradient-norm=0.2725368758568297	learning-rate=0.0006	perplexity-train=3.7990849907883484	perplexity-val=4.876301736157301	time-elapsed=23388.067610263824	used-gpu-memory=2731
46	converged=False	diverged=False	epoch=48	gradient-norm=0.33279608310605646	learning-rate=0.0006	perplexity-train=3.8100680806157574	perplexity-val=4.8750061871384816	time-elapsed=23905.33906364441	used-gpu-memory=2731
47	converged=False	diverged=False	epoch=49	gradient-norm=0.4504714455128803	learning-rate=0.0006	perplexity-train=3.788180200033573	perplexity-val=4.892546227059639	time-elapsed=24422.30738735199	used-gpu-memory=2731
48	converged=False	diverged=False	epoch=50	gradient-norm=0.37980725918017033	learning-rate=0.0006	perplexity-train=3.7649800671925506	perplexity-val=4.8502286423913095	time-elapsed=24933.962718248367	used-gpu-memory=2731
49	converged=False	diverged=False	epoch=51	gradient-norm=0.3312393342091255	learning-rate=0.0006	perplexity-train=3.749698608371304	perplexity-val=4.825800102651181	time-elapsed=25449.528962373734	used-gpu-memory=2731
50	converged=False	diverged=False	epoch=52	gradient-norm=0.3408995118979048	learning-rate=0.0006	perplexity-train=3.7489816991063956	perplexity-val=4.8283150663757075	time-elapsed=25966.484731674194	used-gpu-memory=2731
51	converged=False	diverged=False	epoch=53	gradient-norm=0.33715294467083906	learning-rate=0.0006	perplexity-train=3.738625246190172	perplexity-val=4.870136299634908	time-elapsed=26485.54923057556	used-gpu-memory=2731
52	converged=False	diverged=False	epoch=54	gradient-norm=0.3366629941087485	learning-rate=0.0006	perplexity-train=3.7417033552594305	perplexity-val=4.812657266639348	time-elapsed=27001.432163476944	used-gpu-memory=2731
53	converged=False	diverged=False	epoch=55	gradient-norm=0.27806289684511004	learning-rate=0.0006	perplexity-train=3.7080790719023144	perplexity-val=4.850534259566928	time-elapsed=27516.93854689598	used-gpu-memory=2731
54	converged=False	diverged=False	epoch=56	gradient-norm=0.30276248897793956	learning-rate=0.0006	perplexity-train=3.6994611283529206	perplexity-val=4.800803215090228	time-elapsed=28033.41910791397	used-gpu-memory=2731
55	converged=False	diverged=False	epoch=57	gradient-norm=0.30382223698471056	learning-rate=0.0006	perplexity-train=3.7259226129674072	perplexity-val=4.831362934180787	time-elapsed=28551.176724672318	used-gpu-memory=2731
56	converged=False	diverged=False	epoch=58	gradient-norm=0.28629662506352843	learning-rate=0.0006	perplexity-train=3.6952705038338007	perplexity-val=4.824133470711128	time-elapsed=29066.406499147415	used-gpu-memory=2731
57	converged=False	diverged=False	epoch=59	gradient-norm=0.2749891420428748	learning-rate=0.0006	perplexity-train=3.6884928215827917	perplexity-val=4.7899158726680096	time-elapsed=29583.44628572464	used-gpu-memory=2731
58	converged=False	diverged=False	epoch=60	gradient-norm=0.4203040844061371	learning-rate=0.0006	perplexity-train=3.670328947680897	perplexity-val=4.776789014445524	time-elapsed=30099.196966171265	used-gpu-memory=2731
59	converged=False	diverged=False	epoch=61	gradient-norm=0.3649271450156849	learning-rate=0.0006	perplexity-train=3.666117413866751	perplexity-val=4.76346308960017	time-elapsed=30617.802317619324	used-gpu-memory=2731
60	converged=False	diverged=False	epoch=62	gradient-norm=0.3184944694310507	learning-rate=0.0006	perplexity-train=3.6695494158360358	perplexity-val=4.79559734250997	time-elapsed=31135.24393939972	used-gpu-memory=2731
61	converged=False	diverged=False	epoch=63	gradient-norm=0.38801522511297315	learning-rate=0.0006	perplexity-train=3.6585896210108313	perplexity-val=4.78290282023575	time-elapsed=31651.977187395096	used-gpu-memory=2731
62	converged=False	diverged=False	epoch=64	gradient-norm=0.35194833368287254	learning-rate=0.0006	perplexity-train=3.6440409397591775	perplexity-val=4.781733438612136	time-elapsed=32168.33923101425	used-gpu-memory=2731
63	converged=False	diverged=False	epoch=65	gradient-norm=0.3426529103447216	learning-rate=0.0006	perplexity-train=3.6333045319730175	perplexity-val=4.742701434767091	time-elapsed=32684.073941230774	used-gpu-memory=2731
64	converged=False	diverged=False	epoch=67	gradient-norm=0.29518051246633065	learning-rate=0.0006	perplexity-train=3.6313288979535687	perplexity-val=4.757565096920679	time-elapsed=33201.24750208855	used-gpu-memory=2731
65	converged=False	diverged=False	epoch=68	gradient-norm=0.36796391568484	learning-rate=0.0006	perplexity-train=3.6173891655290444	perplexity-val=4.76469395577965	time-elapsed=33717.50636553764	used-gpu-memory=2731
66	converged=False	diverged=False	epoch=69	gradient-norm=0.2730161295725534	learning-rate=0.0006	perplexity-train=3.610652857093762	perplexity-val=4.825572352757071	time-elapsed=34232.69116997719	used-gpu-memory=2731
67	converged=False	diverged=False	epoch=70	gradient-norm=0.2975900602857471	learning-rate=0.0006	perplexity-train=3.6153278645828606	perplexity-val=4.787617920100798	time-elapsed=34748.101838350296	used-gpu-memory=2731
68	converged=False	diverged=False	epoch=71	gradient-norm=0.338907414750359	learning-rate=0.0006	perplexity-train=3.5912075520021807	perplexity-val=4.744535928030277	time-elapsed=35261.45581507683	used-gpu-memory=2731
69	converged=False	diverged=False	epoch=72	gradient-norm=0.32593135920583877	learning-rate=0.0006	perplexity-train=3.61062386536297	perplexity-val=4.79991162589965	time-elapsed=35780.07929110527	used-gpu-memory=2731
70	converged=False	diverged=False	epoch=73	gradient-norm=0.32248923075552055	learning-rate=0.0006	perplexity-train=3.571475480239834	perplexity-val=4.772437536616302	time-elapsed=36296.869089365005	used-gpu-memory=2731
71	converged=False	diverged=False	epoch=74	gradient-norm=0.33788293143881565	learning-rate=0.0006	perplexity-train=3.5877554239507017	perplexity-val=4.768794625251113	time-elapsed=36812.48500061035	used-gpu-memory=2731
72	converged=False	diverged=False	epoch=75	gradient-norm=0.3414077281208628	learning-rate=0.00041999999999999996	perplexity-train=3.508509695730272	perplexity-val=4.7824463052893975	time-elapsed=37330.09362697601	used-gpu-memory=2731
73	converged=False	diverged=False	epoch=76	gradient-norm=0.35803936639850825	learning-rate=0.00041999999999999996	perplexity-train=3.5336881983803394	perplexity-val=4.733513284889022	time-elapsed=37847.640216350555	used-gpu-memory=2731
74	converged=False	diverged=False	epoch=77	gradient-norm=0.34518962112833945	learning-rate=0.00041999999999999996	perplexity-train=3.51332844424952	perplexity-val=4.747018250586022	time-elapsed=38362.69760489464	used-gpu-memory=2731
75	converged=False	diverged=False	epoch=78	gradient-norm=0.3125465803737538	learning-rate=0.00041999999999999996	perplexity-train=3.494161633633372	perplexity-val=4.7036175236662885	time-elapsed=38879.749744176865	used-gpu-memory=2731
76	converged=False	diverged=False	epoch=79	gradient-norm=0.36610997960527936	learning-rate=0.00041999999999999996	perplexity-train=3.480119589133759	perplexity-val=4.725467385762902	time-elapsed=39395.7725789547	used-gpu-memory=2731
77	converged=False	diverged=False	epoch=80	gradient-norm=0.31059526947315425	learning-rate=0.00041999999999999996	perplexity-train=3.4834782185537714	perplexity-val=4.722347909296116	time-elapsed=39913.06742954254	used-gpu-memory=2731
78	converged=False	diverged=False	epoch=81	gradient-norm=0.3634516278630727	learning-rate=0.00041999999999999996	perplexity-train=3.4816447634699697	perplexity-val=4.697762373421002	time-elapsed=40429.81795310974	used-gpu-memory=2731
79	converged=False	diverged=False	epoch=82	gradient-norm=0.3360918442398208	learning-rate=0.00041999999999999996	perplexity-train=3.4877919176702763	perplexity-val=4.726502076200575	time-elapsed=40949.59749650955	used-gpu-memory=2731
80	converged=False	diverged=False	epoch=83	gradient-norm=0.37059886389439567	learning-rate=0.00041999999999999996	perplexity-train=3.476215612797079	perplexity-val=4.72590322167053	time-elapsed=41464.9764752388	used-gpu-memory=2731
81	converged=False	diverged=False	epoch=84	gradient-norm=0.39763979626237206	learning-rate=0.00041999999999999996	perplexity-train=3.4692289103984337	perplexity-val=4.702696986481001	time-elapsed=41982.86287879944	used-gpu-memory=2731
82	converged=False	diverged=False	epoch=85	gradient-norm=0.3137888097096028	learning-rate=0.00041999999999999996	perplexity-train=3.4504663574772816	perplexity-val=4.697062226831377	time-elapsed=42499.05993890762	used-gpu-memory=2731
83	converged=False	diverged=False	epoch=86	gradient-norm=0.30513899566723235	learning-rate=0.00041999999999999996	perplexity-train=3.45563905823806	perplexity-val=4.728184200416004	time-elapsed=43015.28448677063	used-gpu-memory=2731
84	converged=False	diverged=False	epoch=87	gradient-norm=0.35354719521336386	learning-rate=0.00041999999999999996	perplexity-train=3.4523655603560464	perplexity-val=4.708621570742087	time-elapsed=43531.647384405136	used-gpu-memory=2731
85	converged=False	diverged=False	epoch=89	gradient-norm=0.2793774827654148	learning-rate=0.00041999999999999996	perplexity-train=3.450878013243744	perplexity-val=4.723231526929922	time-elapsed=44049.80193185806	used-gpu-memory=2731
86	converged=False	diverged=False	epoch=90	gradient-norm=0.3310229472338008	learning-rate=0.00041999999999999996	perplexity-train=3.441993376606555	perplexity-val=4.724662238491292	time-elapsed=44567.0661239624	used-gpu-memory=2731
87	converged=False	diverged=False	epoch=91	gradient-norm=0.32167419115244983	learning-rate=0.00041999999999999996	perplexity-train=3.4417958879667294	perplexity-val=4.715808896803384	time-elapsed=45084.20357751846	used-gpu-memory=2731
88	converged=False	diverged=False	epoch=92	gradient-norm=0.2904340739366866	learning-rate=0.00041999999999999996	perplexity-train=3.424463165280904	perplexity-val=4.766966181036932	time-elapsed=45600.38441681862	used-gpu-memory=2731
89	converged=False	diverged=False	epoch=93	gradient-norm=0.4026603830803134	learning-rate=0.00041999999999999996	perplexity-train=3.431649731928791	perplexity-val=4.727839728635435	time-elapsed=46116.45508265495	used-gpu-memory=2731
90	converged=False	diverged=False	epoch=94	gradient-norm=0.3575263459388238	learning-rate=0.00041999999999999996	perplexity-train=3.4266334731110146	perplexity-val=4.691866735434424	time-elapsed=46633.81649494171	used-gpu-memory=2731
91	converged=False	diverged=False	epoch=95	gradient-norm=0.31192186427539315	learning-rate=0.00041999999999999996	perplexity-train=3.419278443640627	perplexity-val=4.743033124088935	time-elapsed=47147.49178504944	used-gpu-memory=10755
92	converged=False	diverged=False	epoch=96	gradient-norm=0.33859450185299167	learning-rate=0.00041999999999999996	perplexity-train=3.4191970662721216	perplexity-val=4.759579432625163	time-elapsed=47662.196382045746	used-gpu-memory=10755
93	converged=False	diverged=False	epoch=97	gradient-norm=0.28422523859593196	learning-rate=0.00041999999999999996	perplexity-train=3.40410053789128	perplexity-val=4.7264205443746	time-elapsed=48176.572083711624	used-gpu-memory=0
94	converged=False	diverged=False	epoch=98	gradient-norm=0.49475567944340687	learning-rate=0.00041999999999999996	perplexity-train=3.41682340105419	perplexity-val=4.737935746200311	time-elapsed=48689.26699757576	used-gpu-memory=0
95	converged=False	diverged=False	epoch=99	gradient-norm=0.3910293171615707	learning-rate=0.00041999999999999996	perplexity-train=3.4186277475569815	perplexity-val=4.725069005840005	time-elapsed=49203.595559835434	used-gpu-memory=0
96	converged=False	diverged=False	epoch=100	gradient-norm=0.3110157591695573	learning-rate=0.00041999999999999996	perplexity-train=3.401044553415284	perplexity-val=4.735518360963694	time-elapsed=49715.82297205925	used-gpu-memory=0
97	converged=False	diverged=False	epoch=101	gradient-norm=0.3151367235043195	learning-rate=0.00041999999999999996	perplexity-train=3.405881083849824	perplexity-val=4.7366899232913395	time-elapsed=50230.57719707489	used-gpu-memory=10755
98	converged=False	diverged=False	epoch=102	gradient-norm=0.32650819261126895	learning-rate=0.00041999999999999996	perplexity-train=3.4113960848056717	perplexity-val=4.704558432159922	time-elapsed=50746.46315121651	used-gpu-memory=10755
99	converged=False	diverged=False	epoch=103	gradient-norm=0.3689906616873708	learning-rate=0.00029399999999999994	perplexity-train=3.360165047114537	perplexity-val=4.701681890912019	time-elapsed=51263.02854800224	used-gpu-memory=0
100	converged=False	diverged=False	epoch=104	gradient-norm=0.34454019157907323	learning-rate=0.00029399999999999994	perplexity-train=3.3620539976687276	perplexity-val=4.7061121981718355	time-elapsed=51775.48666834831	used-gpu-memory=0
101	converged=False	diverged=False	epoch=105	gradient-norm=0.38722228921846225	learning-rate=0.00029399999999999994	perplexity-train=3.3365465172743645	perplexity-val=4.6953258760920535	time-elapsed=52289.21225667	used-gpu-memory=0
102	converged=False	diverged=False	epoch=106	gradient-norm=0.36346539242137454	learning-rate=0.00029399999999999994	perplexity-train=3.348045094264936	perplexity-val=4.6907138713626555	time-elapsed=52805.15872836113	used-gpu-memory=0
103	converged=False	diverged=False	epoch=107	gradient-norm=0.30898654633024486	learning-rate=0.00029399999999999994	perplexity-train=3.3328860299662573	perplexity-val=4.6916140628486	time-elapsed=53319.12334728241	used-gpu-memory=10629
104	converged=False	diverged=False	epoch=108	gradient-norm=0.4059929409147306	learning-rate=0.00029399999999999994	perplexity-train=3.3369715544700203	perplexity-val=4.6942422871205	time-elapsed=53832.638525247574	used-gpu-memory=0
105	converged=False	diverged=False	epoch=109	gradient-norm=0.34432181109987764	learning-rate=0.00029399999999999994	perplexity-train=3.3309354869072516	perplexity-val=4.688520440522221	time-elapsed=54348.51060438156	used-gpu-memory=147
106	converged=False	diverged=False	epoch=110	gradient-norm=0.3255604043855041	learning-rate=0.00029399999999999994	perplexity-train=3.338473184742801	perplexity-val=4.699752013990269	time-elapsed=54863.68437290192	used-gpu-memory=10709
107	converged=False	diverged=False	epoch=112	gradient-norm=0.32550648703586366	learning-rate=0.00029399999999999994	perplexity-train=3.3252464197221574	perplexity-val=4.717826205546749	time-elapsed=55379.762439250946	used-gpu-memory=0
108	converged=False	diverged=False	epoch=113	gradient-norm=0.2825764608789427	learning-rate=0.00029399999999999994	perplexity-train=3.313434155470277	perplexity-val=4.7161677424283255	time-elapsed=55895.08412909508	used-gpu-memory=10709
109	converged=False	diverged=False	epoch=114	gradient-norm=0.3816272008356742	learning-rate=0.00029399999999999994	perplexity-train=3.316520816352247	perplexity-val=4.716341703797122	time-elapsed=56410.70711684227	used-gpu-memory=0
110	converged=False	diverged=False	epoch=115	gradient-norm=0.3520286827605364	learning-rate=0.00029399999999999994	perplexity-train=3.3227541536324496	perplexity-val=4.720758768535502	time-elapsed=56926.90707325935	used-gpu-memory=10725
111	converged=False	diverged=False	epoch=116	gradient-norm=0.37697691142600415	learning-rate=0.00029399999999999994	perplexity-train=3.312657649464922	perplexity-val=4.721946596171089	time-elapsed=57441.38577437401	used-gpu-memory=10755
112	converged=False	diverged=False	epoch=117	gradient-norm=0.2985309800865633	learning-rate=0.00029399999999999994	perplexity-train=3.3055565397361906	perplexity-val=4.712841197901964	time-elapsed=57958.41664791107	used-gpu-memory=0
113	converged=False	diverged=False	epoch=118	gradient-norm=0.3016515185457429	learning-rate=0.00029399999999999994	perplexity-train=3.292117675018697	perplexity-val=4.7323073148847605	time-elapsed=58473.3846013546	used-gpu-memory=10629
114	converged=False	diverged=False	epoch=119	gradient-norm=0.340832440547855	learning-rate=0.00020579999999999993	perplexity-train=3.2856421600006143	perplexity-val=4.673388669155239	time-elapsed=58990.50368094444	used-gpu-memory=147
115	converged=False	diverged=False	epoch=120	gradient-norm=0.3437242978142141	learning-rate=0.00020579999999999993	perplexity-train=3.283462211023675	perplexity-val=4.6968486186193275	time-elapsed=59507.13221025467	used-gpu-memory=10739
116	converged=False	diverged=False	epoch=121	gradient-norm=0.4340203583176625	learning-rate=0.00020579999999999993	perplexity-train=3.277412830739287	perplexity-val=4.6800382880669265	time-elapsed=60023.9998292923	used-gpu-memory=0
117	converged=False	diverged=False	epoch=122	gradient-norm=0.3034835012011843	learning-rate=0.00020579999999999993	perplexity-train=3.276675634945868	perplexity-val=4.698253715757305	time-elapsed=60540.61178946495	used-gpu-memory=0
118	converged=False	diverged=False	epoch=123	gradient-norm=0.2919686083047558	learning-rate=0.00020579999999999993	perplexity-train=3.266165513579239	perplexity-val=4.699312353092772	time-elapsed=61056.407183647156	used-gpu-memory=0
119	converged=False	diverged=False	epoch=124	gradient-norm=0.2862753975880504	learning-rate=0.00020579999999999993	perplexity-train=3.260989833896099	perplexity-val=4.682594560020295	time-elapsed=61569.67044687271	used-gpu-memory=0
120	converged=False	diverged=False	epoch=125	gradient-norm=0.36402211490394104	learning-rate=0.00020579999999999993	perplexity-train=3.2661926727405937	perplexity-val=4.6873737605542845	time-elapsed=62082.925141334534	used-gpu-memory=10755
121	converged=False	diverged=False	epoch=126	gradient-norm=0.2735577431965455	learning-rate=0.00020579999999999993	perplexity-train=3.260480221435161	perplexity-val=4.7001177788548985	time-elapsed=62599.000054359436	used-gpu-memory=0
122	converged=False	diverged=False	epoch=127	gradient-norm=0.30654096885199095	learning-rate=0.00020579999999999993	perplexity-train=3.255759282339433	perplexity-val=4.697972718251278	time-elapsed=63113.32467055321	used-gpu-memory=0
123	converged=False	diverged=False	epoch=128	gradient-norm=0.29681922037020514	learning-rate=0.00014405999999999995	perplexity-train=3.2456632909310446	perplexity-val=4.68877814366206	time-elapsed=63628.911660432816	used-gpu-memory=0
124	converged=False	diverged=False	epoch=129	gradient-norm=0.2985407984368275	learning-rate=0.00014405999999999995	perplexity-train=3.2525576233099383	perplexity-val=4.676112542655598	time-elapsed=64146.761221170425	used-gpu-memory=0
125	converged=False	diverged=False	epoch=130	gradient-norm=0.3533685719781016	learning-rate=0.00014405999999999995	perplexity-train=3.249885998911889	perplexity-val=4.694452779606527	time-elapsed=64662.13739514351	used-gpu-memory=0
126	converged=False	diverged=False	epoch=131	gradient-norm=0.3412655499753283	learning-rate=0.00014405999999999995	perplexity-train=3.242420332582153	perplexity-val=4.696824920451717	time-elapsed=65178.53571271896	used-gpu-memory=10657
127	converged=False	diverged=False	epoch=132	gradient-norm=0.31414876226230554	learning-rate=0.00014405999999999995	perplexity-train=3.236866120275482	perplexity-val=4.700364407321447	time-elapsed=65694.88331341743	used-gpu-memory=0
128	converged=False	diverged=False	epoch=134	gradient-norm=0.39038454457627986	learning-rate=0.00014405999999999995	perplexity-train=3.233227835507205	perplexity-val=4.686914200114034	time-elapsed=66211.57599067688	used-gpu-memory=0
129	converged=False	diverged=False	epoch=135	gradient-norm=0.3302086306166268	learning-rate=0.00014405999999999995	perplexity-train=3.223257409062942	perplexity-val=4.684319224310783	time-elapsed=66725.86477899551	used-gpu-memory=10755
130	converged=True	diverged=False	epoch=136	gradient-norm=0.29571413791106177	learning-rate=0.00014405999999999995	perplexity-train=3.2137925612587783	perplexity-val=4.702258606765096	time-elapsed=67241.93606448174	used-gpu-memory=0
