1	converged=False	diverged=False	epoch=1	gradient-norm=0.5780079498075056	learning-rate=0.001	perplexity-train=62.53786783426844	perplexity-val=36.77929929227928	time-elapsed=320.60873079299927	used-gpu-memory=4209
2	converged=False	diverged=False	epoch=2	gradient-norm=0.4760166876035244	learning-rate=0.001	perplexity-train=20.13087531879287	perplexity-val=22.513375771456595	time-elapsed=644.8306789398193	used-gpu-memory=4239
3	converged=False	diverged=False	epoch=3	gradient-norm=0.4020296921053001	learning-rate=0.001	perplexity-train=14.123768322061549	perplexity-val=17.73974562993296	time-elapsed=969.2039518356323	used-gpu-memory=4239
4	converged=False	diverged=False	epoch=4	gradient-norm=0.39739519720616284	learning-rate=0.001	perplexity-train=11.395447789453188	perplexity-val=14.886484272814554	time-elapsed=1293.7277421951294	used-gpu-memory=4239
5	converged=False	diverged=False	epoch=5	gradient-norm=0.3868439348025256	learning-rate=0.001	perplexity-train=9.93165836998627	perplexity-val=13.222665136451017	time-elapsed=1617.1883392333984	used-gpu-memory=4239
6	converged=False	diverged=False	epoch=6	gradient-norm=0.38827290679052207	learning-rate=0.001	perplexity-train=8.963624858239804	perplexity-val=12.362374361098617	time-elapsed=1941.1320316791534	used-gpu-memory=4239
7	converged=False	diverged=False	epoch=7	gradient-norm=0.34376016097730716	learning-rate=0.001	perplexity-train=8.31240981385674	perplexity-val=11.751256545111001	time-elapsed=2265.003586292267	used-gpu-memory=4239
8	converged=False	diverged=False	epoch=8	gradient-norm=0.3515930220965875	learning-rate=0.001	perplexity-train=7.880043839693928	perplexity-val=11.283751886321266	time-elapsed=2590.3546109199524	used-gpu-memory=4239
9	converged=False	diverged=False	epoch=9	gradient-norm=0.34110308446420806	learning-rate=0.001	perplexity-train=7.479565899932686	perplexity-val=10.714868601236411	time-elapsed=2913.9946472644806	used-gpu-memory=4239
10	converged=False	diverged=False	epoch=10	gradient-norm=0.37261844141483497	learning-rate=0.001	perplexity-train=7.190538974415116	perplexity-val=10.428600487070002	time-elapsed=3239.4163389205933	used-gpu-memory=4239
11	converged=False	diverged=False	epoch=11	gradient-norm=0.3269823209776078	learning-rate=0.001	perplexity-train=6.921422515880977	perplexity-val=10.096005307215771	time-elapsed=3565.2645523548126	used-gpu-memory=4239
12	converged=False	diverged=False	epoch=12	gradient-norm=0.37227611408718275	learning-rate=0.001	perplexity-train=6.747852838816393	perplexity-val=9.913274900663756	time-elapsed=3888.2708916664124	used-gpu-memory=4239
13	converged=False	diverged=False	epoch=13	gradient-norm=0.32827195566016487	learning-rate=0.001	perplexity-train=6.552190738978784	perplexity-val=9.692277992422778	time-elapsed=4211.183361530304	used-gpu-memory=4239
14	converged=False	diverged=False	epoch=14	gradient-norm=0.37302153326767234	learning-rate=0.001	perplexity-train=6.42121726244833	perplexity-val=9.543563410050588	time-elapsed=4532.83976817131	used-gpu-memory=4239
15	converged=False	diverged=False	epoch=15	gradient-norm=0.4203876311855425	learning-rate=0.001	perplexity-train=6.268237415080598	perplexity-val=9.343795292959111	time-elapsed=4857.685341119766	used-gpu-memory=4239
16	converged=False	diverged=False	epoch=16	gradient-norm=0.3982956267461982	learning-rate=0.001	perplexity-train=6.149763226904062	perplexity-val=9.249337945408552	time-elapsed=5182.823632717133	used-gpu-memory=4239
17	converged=False	diverged=False	epoch=17	gradient-norm=0.3039455503456293	learning-rate=0.001	perplexity-train=6.057638810498205	perplexity-val=9.09831855239522	time-elapsed=5508.253728866577	used-gpu-memory=4239
18	converged=False	diverged=False	epoch=18	gradient-norm=0.29716095948254434	learning-rate=0.001	perplexity-train=5.960917835858429	perplexity-val=8.94869565847842	time-elapsed=5834.2207798957825	used-gpu-memory=4239
19	converged=False	diverged=False	epoch=19	gradient-norm=0.3359398326808331	learning-rate=0.001	perplexity-train=5.864674630550932	perplexity-val=8.866965379782643	time-elapsed=6159.567941188812	used-gpu-memory=4239
20	converged=False	diverged=False	epoch=20	gradient-norm=0.3737572068828773	learning-rate=0.001	perplexity-train=5.8022591252740305	perplexity-val=8.833609048209079	time-elapsed=6488.003424167633	used-gpu-memory=4239
21	converged=False	diverged=False	epoch=21	gradient-norm=0.39873609996981485	learning-rate=0.001	perplexity-train=5.711121498591776	perplexity-val=8.757117887850242	time-elapsed=6812.9146201610565	used-gpu-memory=4239
22	converged=False	diverged=False	epoch=22	gradient-norm=0.32682352976235945	learning-rate=0.001	perplexity-train=5.6636820079318735	perplexity-val=8.664909913381674	time-elapsed=7138.606023073196	used-gpu-memory=4239
23	converged=False	diverged=False	epoch=23	gradient-norm=0.34699592216299907	learning-rate=0.001	perplexity-train=5.600830379805445	perplexity-val=8.514867798800616	time-elapsed=7464.1752161979675	used-gpu-memory=4239
24	converged=False	diverged=False	epoch=24	gradient-norm=0.4323369046996122	learning-rate=0.001	perplexity-train=5.519370653948661	perplexity-val=8.528169120793835	time-elapsed=7786.8266685009	used-gpu-memory=4239
25	converged=False	diverged=False	epoch=25	gradient-norm=0.3424267678206347	learning-rate=0.001	perplexity-train=5.48387843465685	perplexity-val=8.441944500644476	time-elapsed=8112.920539617538	used-gpu-memory=4239
26	converged=False	diverged=False	epoch=26	gradient-norm=0.33497091141323937	learning-rate=0.001	perplexity-train=5.434022987569056	perplexity-val=8.379327385219403	time-elapsed=8437.60152554512	used-gpu-memory=4239
27	converged=False	diverged=False	epoch=27	gradient-norm=0.35437288110087084	learning-rate=0.001	perplexity-train=5.392107126347906	perplexity-val=8.294392532896708	time-elapsed=8763.50366306305	used-gpu-memory=4239
28	converged=False	diverged=False	epoch=28	gradient-norm=0.4341281152489876	learning-rate=0.001	perplexity-train=5.331172523129274	perplexity-val=8.255114827834095	time-elapsed=9088.445625782013	used-gpu-memory=4239
29	converged=False	diverged=False	epoch=29	gradient-norm=0.3585146271011679	learning-rate=0.001	perplexity-train=5.315289840424674	perplexity-val=8.14407077454299	time-elapsed=9414.258186340332	used-gpu-memory=4239
30	converged=False	diverged=False	epoch=30	gradient-norm=0.5413700637558796	learning-rate=0.001	perplexity-train=5.270825662613332	perplexity-val=8.238180908971238	time-elapsed=9738.936111450195	used-gpu-memory=4239
31	converged=False	diverged=False	epoch=31	gradient-norm=0.36097049241957213	learning-rate=0.001	perplexity-train=5.214954761335909	perplexity-val=8.14194282458938	time-elapsed=10064.783088207245	used-gpu-memory=4239
32	converged=False	diverged=False	epoch=32	gradient-norm=0.4749640856119447	learning-rate=0.001	perplexity-train=5.196854581230882	perplexity-val=8.0592093176651	time-elapsed=10390.215383529663	used-gpu-memory=4239
33	converged=False	diverged=False	epoch=33	gradient-norm=0.3840366555124968	learning-rate=0.001	perplexity-train=5.171096864430241	perplexity-val=8.021595611187422	time-elapsed=10715.048932313919	used-gpu-memory=4239
34	converged=False	diverged=False	epoch=34	gradient-norm=0.48282732675421985	learning-rate=0.001	perplexity-train=5.123984362937571	perplexity-val=7.9906875769775105	time-elapsed=11039.35552930832	used-gpu-memory=4239
35	converged=False	diverged=False	epoch=35	gradient-norm=0.4081932831182111	learning-rate=0.001	perplexity-train=5.102020335254016	perplexity-val=8.010304014374986	time-elapsed=11363.756172418594	used-gpu-memory=4239
36	converged=False	diverged=False	epoch=36	gradient-norm=0.3093476830479586	learning-rate=0.001	perplexity-train=5.071586205978723	perplexity-val=7.95135733434709	time-elapsed=11689.243587493896	used-gpu-memory=4239
37	converged=False	diverged=False	epoch=37	gradient-norm=0.4004633090248023	learning-rate=0.001	perplexity-train=5.046030300344705	perplexity-val=7.9717690045915806	time-elapsed=12014.20673942566	used-gpu-memory=4239
38	converged=False	diverged=False	epoch=38	gradient-norm=0.3184507204878581	learning-rate=0.001	perplexity-train=5.0248977298007285	perplexity-val=7.920756571524985	time-elapsed=12336.079943418503	used-gpu-memory=4239
39	converged=False	diverged=False	epoch=39	gradient-norm=0.3245207486587505	learning-rate=0.001	perplexity-train=5.001521961645722	perplexity-val=7.881464163982778	time-elapsed=12660.632343769073	used-gpu-memory=4239
40	converged=False	diverged=False	epoch=40	gradient-norm=0.3267072026418846	learning-rate=0.001	perplexity-train=4.973585481615058	perplexity-val=7.839946461792282	time-elapsed=12985.96344590187	used-gpu-memory=4239
41	converged=False	diverged=False	epoch=42	gradient-norm=0.3890926321908003	learning-rate=0.001	perplexity-train=4.946471491511485	perplexity-val=7.81366816393746	time-elapsed=13311.781359434128	used-gpu-memory=4239
42	converged=False	diverged=False	epoch=43	gradient-norm=0.3668886307313311	learning-rate=0.001	perplexity-train=4.925835477781904	perplexity-val=7.850880432836411	time-elapsed=13635.847841262817	used-gpu-memory=4239
43	converged=False	diverged=False	epoch=44	gradient-norm=0.3683329734634482	learning-rate=0.001	perplexity-train=4.901038192240331	perplexity-val=7.827051189984829	time-elapsed=13960.228835344315	used-gpu-memory=4239
44	converged=False	diverged=False	epoch=45	gradient-norm=0.43092085705977085	learning-rate=0.001	perplexity-train=4.891478804474877	perplexity-val=7.808303800592408	time-elapsed=14286.400990247726	used-gpu-memory=4239
45	converged=False	diverged=False	epoch=46	gradient-norm=0.4190989494922846	learning-rate=0.001	perplexity-train=4.860148893696885	perplexity-val=7.809884638609972	time-elapsed=14610.48812365532	used-gpu-memory=4239
46	converged=False	diverged=False	epoch=47	gradient-norm=0.5976506259620858	learning-rate=0.001	perplexity-train=4.848113643442551	perplexity-val=7.811838234628525	time-elapsed=14934.59349822998	used-gpu-memory=4239
47	converged=False	diverged=False	epoch=48	gradient-norm=0.3783522330086965	learning-rate=0.001	perplexity-train=4.832178739287032	perplexity-val=7.719252184514323	time-elapsed=15260.367285013199	used-gpu-memory=4239
48	converged=False	diverged=False	epoch=49	gradient-norm=0.44518781655161044	learning-rate=0.001	perplexity-train=4.808219909338076	perplexity-val=7.8193891904134	time-elapsed=15576.094910144806	used-gpu-memory=4239
49	converged=False	diverged=False	epoch=50	gradient-norm=0.4990310057783292	learning-rate=0.001	perplexity-train=4.803176411986756	perplexity-val=7.734608804505222	time-elapsed=15902.234076976776	used-gpu-memory=4239
50	converged=False	diverged=False	epoch=51	gradient-norm=0.3625086073147398	learning-rate=0.001	perplexity-train=4.77386608918676	perplexity-val=7.68109587907939	time-elapsed=16228.14335489273	used-gpu-memory=4239
51	converged=False	diverged=False	epoch=52	gradient-norm=0.3308908228967559	learning-rate=0.001	perplexity-train=4.7719326405375915	perplexity-val=7.748211946631152	time-elapsed=16553.027514219284	used-gpu-memory=4239
52	converged=False	diverged=False	epoch=53	gradient-norm=0.45109170510011265	learning-rate=0.001	perplexity-train=4.747674055947553	perplexity-val=7.698515579848127	time-elapsed=16877.830428361893	used-gpu-memory=4239
53	converged=False	diverged=False	epoch=54	gradient-norm=0.37347101963286805	learning-rate=0.001	perplexity-train=4.7362182925831595	perplexity-val=7.62275407311503	time-elapsed=17194.11877846718	used-gpu-memory=4239
54	converged=False	diverged=False	epoch=55	gradient-norm=0.4060737709327325	learning-rate=0.001	perplexity-train=4.724677596408959	perplexity-val=7.613635712566897	time-elapsed=17520.055748701096	used-gpu-memory=4239
55	converged=False	diverged=False	epoch=56	gradient-norm=0.3659724726749136	learning-rate=0.001	perplexity-train=4.69789690591336	perplexity-val=7.577931680587456	time-elapsed=17844.75591468811	used-gpu-memory=4239
56	converged=False	diverged=False	epoch=57	gradient-norm=0.3743145934774077	learning-rate=0.001	perplexity-train=4.706057405538465	perplexity-val=7.6059843056053085	time-elapsed=18163.132046222687	used-gpu-memory=4239
57	converged=False	diverged=False	epoch=58	gradient-norm=0.42917082174992655	learning-rate=0.001	perplexity-train=4.679322538539881	perplexity-val=7.578732213894746	time-elapsed=18487.676813840866	used-gpu-memory=4239
58	converged=False	diverged=False	epoch=59	gradient-norm=0.4256588111788625	learning-rate=0.001	perplexity-train=4.674513760048804	perplexity-val=7.584196964573949	time-elapsed=18812.594866514206	used-gpu-memory=4239
59	converged=False	diverged=False	epoch=60	gradient-norm=0.3975846319787782	learning-rate=0.001	perplexity-train=4.66114351098134	perplexity-val=7.583142487551947	time-elapsed=19136.51792740822	used-gpu-memory=4239
60	converged=False	diverged=False	epoch=61	gradient-norm=0.41856375204506957	learning-rate=0.001	perplexity-train=4.64971606751632	perplexity-val=7.586665618646134	time-elapsed=19461.409967899323	used-gpu-memory=4239
61	converged=False	diverged=False	epoch=62	gradient-norm=0.42862072951513386	learning-rate=0.001	perplexity-train=4.632970383271875	perplexity-val=7.591510841057527	time-elapsed=19784.426288843155	used-gpu-memory=4239
62	converged=False	diverged=False	epoch=63	gradient-norm=0.4464030869068128	learning-rate=0.001	perplexity-train=4.629652583453093	perplexity-val=7.628791975431765	time-elapsed=20109.637773513794	used-gpu-memory=4239
63	converged=False	diverged=False	epoch=64	gradient-norm=0.3802948702446247	learning-rate=0.001	perplexity-train=4.618952028237974	perplexity-val=7.552954933622096	time-elapsed=20432.799023866653	used-gpu-memory=4239
64	converged=False	diverged=False	epoch=65	gradient-norm=0.3942100329670477	learning-rate=0.001	perplexity-train=4.601824683408198	perplexity-val=7.584476057729137	time-elapsed=20757.91491484642	used-gpu-memory=4239
65	converged=False	diverged=False	epoch=66	gradient-norm=0.5297184041943791	learning-rate=0.001	perplexity-train=4.593472058516421	perplexity-val=7.448187057055557	time-elapsed=21083.53285598755	used-gpu-memory=4239
66	converged=False	diverged=False	epoch=67	gradient-norm=0.7233512815504459	learning-rate=0.001	perplexity-train=4.579036305403781	perplexity-val=7.536055043510983	time-elapsed=21402.19121837616	used-gpu-memory=4239
67	converged=False	diverged=False	epoch=68	gradient-norm=0.403063270496124	learning-rate=0.001	perplexity-train=4.583022764304946	perplexity-val=7.470047873348484	time-elapsed=21727.70580482483	used-gpu-memory=4239
68	converged=False	diverged=False	epoch=69	gradient-norm=0.41184750988956675	learning-rate=0.001	perplexity-train=4.554821844566363	perplexity-val=7.460554490201604	time-elapsed=22052.01130080223	used-gpu-memory=4239
69	converged=False	diverged=False	epoch=70	gradient-norm=0.4409230357287089	learning-rate=0.001	perplexity-train=4.563952723797002	perplexity-val=7.456630929267123	time-elapsed=22376.884914398193	used-gpu-memory=4239
70	converged=False	diverged=False	epoch=71	gradient-norm=0.4085806919487986	learning-rate=0.001	perplexity-train=4.541592281056154	perplexity-val=7.510214697499688	time-elapsed=22701.471471309662	used-gpu-memory=4239
71	converged=False	diverged=False	epoch=72	gradient-norm=0.4156369295321342	learning-rate=0.001	perplexity-train=4.542308882346197	perplexity-val=7.49123896043889	time-elapsed=23026.329588890076	used-gpu-memory=4239
72	converged=False	diverged=False	epoch=73	gradient-norm=0.46035571854031104	learning-rate=0.001	perplexity-train=4.521719914269877	perplexity-val=7.451711791511793	time-elapsed=23346.720771551132	used-gpu-memory=4239
73	converged=False	diverged=False	epoch=74	gradient-norm=0.4663797580347473	learning-rate=0.001	perplexity-train=4.527513850078945	perplexity-val=7.4456399220517815	time-elapsed=23672.465357780457	used-gpu-memory=4239
74	converged=False	diverged=False	epoch=75	gradient-norm=0.3797805452333478	learning-rate=0.001	perplexity-train=4.506467745831742	perplexity-val=7.45665994101188	time-elapsed=23997.15577316284	used-gpu-memory=4239
75	converged=False	diverged=False	epoch=76	gradient-norm=0.5530677494875144	learning-rate=0.001	perplexity-train=4.506537781186364	perplexity-val=7.48619800098825	time-elapsed=24321.183546066284	used-gpu-memory=4239
76	converged=False	diverged=False	epoch=77	gradient-norm=0.42604895006844606	learning-rate=0.001	perplexity-train=4.498677187585301	perplexity-val=7.4155622160340755	time-elapsed=24646.163827896118	used-gpu-memory=4239
77	converged=False	diverged=False	epoch=78	gradient-norm=0.448404104839677	learning-rate=0.001	perplexity-train=4.486400330076606	perplexity-val=7.376489209251992	time-elapsed=24971.437654733658	used-gpu-memory=4239
78	converged=False	diverged=False	epoch=79	gradient-norm=0.4420705848044655	learning-rate=0.001	perplexity-train=4.481458432559655	perplexity-val=7.379059701083412	time-elapsed=25296.067359685898	used-gpu-memory=4239
79	converged=False	diverged=False	epoch=80	gradient-norm=0.4862738513794142	learning-rate=0.001	perplexity-train=4.463280686051704	perplexity-val=7.4401065172744625	time-elapsed=25620.520972013474	used-gpu-memory=4239
80	converged=False	diverged=False	epoch=81	gradient-norm=0.4879242873126719	learning-rate=0.001	perplexity-train=4.467233184808446	perplexity-val=7.428074409913915	time-elapsed=25945.13539648056	used-gpu-memory=4239
81	converged=False	diverged=False	epoch=82	gradient-norm=0.45891185474641466	learning-rate=0.001	perplexity-train=4.45981723966125	perplexity-val=7.4077737735615505	time-elapsed=26270.773432970047	used-gpu-memory=4239
82	converged=False	diverged=False	epoch=84	gradient-norm=0.8121866935330722	learning-rate=0.001	perplexity-train=4.448336853892411	perplexity-val=7.39894136024991	time-elapsed=26596.620414972305	used-gpu-memory=4239
83	converged=False	diverged=False	epoch=85	gradient-norm=0.4866800202175815	learning-rate=0.001	perplexity-train=4.4353183333242265	perplexity-val=7.473911388560725	time-elapsed=26921.31671476364	used-gpu-memory=4239
84	converged=False	diverged=False	epoch=86	gradient-norm=0.4628578169247098	learning-rate=0.001	perplexity-train=4.435018715753167	perplexity-val=7.438466446818479	time-elapsed=27247.081559419632	used-gpu-memory=4239
85	converged=False	diverged=False	epoch=87	gradient-norm=0.5109243381385946	learning-rate=0.001	perplexity-train=4.429740778912982	perplexity-val=7.406140998340902	time-elapsed=27571.965205430984	used-gpu-memory=4239
86	converged=False	diverged=False	epoch=88	gradient-norm=0.49348166288588435	learning-rate=0.0007	perplexity-train=4.332073452170774	perplexity-val=7.337963673939998	time-elapsed=27897.62094283104	used-gpu-memory=4289
87	converged=False	diverged=False	epoch=89	gradient-norm=0.45418473980452717	learning-rate=0.0007	perplexity-train=4.318281777181586	perplexity-val=7.337396156754366	time-elapsed=28223.701897144318	used-gpu-memory=4289
88	converged=False	diverged=False	epoch=90	gradient-norm=0.39253470506336724	learning-rate=0.0007	perplexity-train=4.300873152770595	perplexity-val=7.408179067384494	time-elapsed=28547.59708213806	used-gpu-memory=4289
89	converged=False	diverged=False	epoch=91	gradient-norm=0.4577418154118039	learning-rate=0.0007	perplexity-train=4.2959495511612324	perplexity-val=7.313411190728557	time-elapsed=28873.72426509857	used-gpu-memory=4289
90	converged=False	diverged=False	epoch=92	gradient-norm=0.6403638736000715	learning-rate=0.0007	perplexity-train=4.282756607200187	perplexity-val=7.300883730004353	time-elapsed=29199.058387041092	used-gpu-memory=4289
91	converged=False	diverged=False	epoch=93	gradient-norm=0.5153490374681695	learning-rate=0.0007	perplexity-train=4.275925210959037	perplexity-val=7.33886265428833	time-elapsed=29523.523907661438	used-gpu-memory=4289
92	converged=False	diverged=False	epoch=94	gradient-norm=0.42385176019473575	learning-rate=0.0007	perplexity-train=4.262415591193547	perplexity-val=7.330033528096344	time-elapsed=29847.603485107422	used-gpu-memory=4289
93	converged=False	diverged=False	epoch=95	gradient-norm=0.4814865757304481	learning-rate=0.0007	perplexity-train=4.261810294024234	perplexity-val=7.302404289540316	time-elapsed=30173.163905382156	used-gpu-memory=4289
94	converged=False	diverged=False	epoch=96	gradient-norm=0.4376434193321257	learning-rate=0.0007	perplexity-train=4.260214669916711	perplexity-val=7.311007150910986	time-elapsed=30497.564751386642	used-gpu-memory=4289
95	converged=False	diverged=False	epoch=97	gradient-norm=0.39696912662928807	learning-rate=0.0007	perplexity-train=4.249160912333824	perplexity-val=7.285944219439756	time-elapsed=30823.87240767479	used-gpu-memory=4289
96	converged=False	diverged=False	epoch=98	gradient-norm=0.455150303926949	learning-rate=0.0007	perplexity-train=4.251770433995909	perplexity-val=7.299111931387814	time-elapsed=31146.010119199753	used-gpu-memory=4289
97	converged=False	diverged=False	epoch=99	gradient-norm=0.4093376429649415	learning-rate=0.0007	perplexity-train=4.231124858090971	perplexity-val=7.34754141865113	time-elapsed=31470.586538791656	used-gpu-memory=4289
98	converged=False	diverged=False	epoch=100	gradient-norm=0.5067533618619541	learning-rate=0.0007	perplexity-train=4.238199497464869	perplexity-val=7.294296080416617	time-elapsed=31795.61737895012	used-gpu-memory=4289
99	converged=False	diverged=False	epoch=101	gradient-norm=0.4526626090173685	learning-rate=0.0007	perplexity-train=4.23331276023394	perplexity-val=7.291997523187434	time-elapsed=32121.369778633118	used-gpu-memory=4289
100	converged=False	diverged=False	epoch=102	gradient-norm=0.4642090426326814	learning-rate=0.0007	perplexity-train=4.22337558639263	perplexity-val=7.289657642929325	time-elapsed=32445.905000448227	used-gpu-memory=4289
101	converged=False	diverged=False	epoch=103	gradient-norm=0.43328671404524133	learning-rate=0.0007	perplexity-train=4.224648053627132	perplexity-val=7.255190090027635	time-elapsed=32771.78729867935	used-gpu-memory=4289
102	converged=False	diverged=False	epoch=104	gradient-norm=0.5220916330358959	learning-rate=0.0007	perplexity-train=4.225874900740053	perplexity-val=7.314293663861114	time-elapsed=33094.53458118439	used-gpu-memory=4289
103	converged=False	diverged=False	epoch=105	gradient-norm=0.45066089938135784	learning-rate=0.0007	perplexity-train=4.207287210509787	perplexity-val=7.319335399751467	time-elapsed=33419.39771795273	used-gpu-memory=4291
104	converged=False	diverged=False	epoch=106	gradient-norm=0.45973809571998714	learning-rate=0.0007	perplexity-train=4.209144452889959	perplexity-val=7.280527977658813	time-elapsed=33744.75179004669	used-gpu-memory=4291
105	converged=False	diverged=False	epoch=107	gradient-norm=0.48815352782312954	learning-rate=0.0007	perplexity-train=4.203814676235321	perplexity-val=7.249156854940868	time-elapsed=34070.90604567528	used-gpu-memory=4291
106	converged=False	diverged=False	epoch=108	gradient-norm=0.5292062111090254	learning-rate=0.0007	perplexity-train=4.207891029056646	perplexity-val=7.278408881328204	time-elapsed=34396.58675312996	used-gpu-memory=4291
107	converged=False	diverged=False	epoch=109	gradient-norm=0.48915038994159	learning-rate=0.0007	perplexity-train=4.193394096548721	perplexity-val=7.295969242103887	time-elapsed=34722.14060306549	used-gpu-memory=4291
108	converged=False	diverged=False	epoch=110	gradient-norm=0.5784235268318803	learning-rate=0.0007	perplexity-train=4.186116020452601	perplexity-val=7.245281585301612	time-elapsed=35047.03145670891	used-gpu-memory=4291
109	converged=False	diverged=False	epoch=111	gradient-norm=0.4679836687978757	learning-rate=0.0007	perplexity-train=4.199224829947714	perplexity-val=7.2255978796606035	time-elapsed=35372.31702494621	used-gpu-memory=4291
110	converged=False	diverged=False	epoch=112	gradient-norm=0.5174083828496635	learning-rate=0.0007	perplexity-train=4.19375714346592	perplexity-val=7.266813114003594	time-elapsed=35697.54604768753	used-gpu-memory=4291
111	converged=False	diverged=False	epoch=113	gradient-norm=0.45249553492164307	learning-rate=0.0007	perplexity-train=4.180292222213423	perplexity-val=7.333353135474314	time-elapsed=36017.31479263306	used-gpu-memory=4291
112	converged=False	diverged=False	epoch=114	gradient-norm=0.5028998930398422	learning-rate=0.0007	perplexity-train=4.181759237400669	perplexity-val=7.316776522271579	time-elapsed=36338.4081902504	used-gpu-memory=4291
113	converged=False	diverged=False	epoch=115	gradient-norm=0.4932298213841886	learning-rate=0.0007	perplexity-train=4.1802419739031045	perplexity-val=7.271369717095016	time-elapsed=36663.860902786255	used-gpu-memory=4291
114	converged=False	diverged=False	epoch=116	gradient-norm=0.5533957263179982	learning-rate=0.0007	perplexity-train=4.172002206909519	perplexity-val=7.23704764014279	time-elapsed=36989.24099302292	used-gpu-memory=4291
115	converged=False	diverged=False	epoch=117	gradient-norm=0.5316624292830463	learning-rate=0.0007	perplexity-train=4.168858350451794	perplexity-val=7.317542366081585	time-elapsed=37314.01313185692	used-gpu-memory=4291
116	converged=False	diverged=False	epoch=118	gradient-norm=0.49878241804901935	learning-rate=0.0007	perplexity-train=4.172145046217198	perplexity-val=7.282570315660045	time-elapsed=37639.42980122566	used-gpu-memory=4291
117	converged=False	diverged=False	epoch=119	gradient-norm=0.4549464529243083	learning-rate=0.0007	perplexity-train=4.162040493523093	perplexity-val=7.217239093924372	time-elapsed=37964.52473306656	used-gpu-memory=4291
118	converged=False	diverged=False	epoch=120	gradient-norm=0.500209828338736	learning-rate=0.0007	perplexity-train=4.165050315793724	perplexity-val=7.220506604224149	time-elapsed=38289.826602220535	used-gpu-memory=4291
119	converged=False	diverged=False	epoch=121	gradient-norm=0.4810786319378408	learning-rate=0.0007	perplexity-train=4.1606032586082735	perplexity-val=7.245968972399646	time-elapsed=38614.46607398987	used-gpu-memory=4291
120	converged=False	diverged=False	epoch=122	gradient-norm=0.440745388635198	learning-rate=0.0007	perplexity-train=4.155396317349591	perplexity-val=7.264530497315686	time-elapsed=38939.85646224022	used-gpu-memory=4291
121	converged=False	diverged=False	epoch=123	gradient-norm=0.5073183891282287	learning-rate=0.0007	perplexity-train=4.149200428006402	perplexity-val=7.215961717835534	time-elapsed=39264.9425573349	used-gpu-memory=4291
122	converged=False	diverged=False	epoch=125	gradient-norm=0.5546173597676034	learning-rate=0.0007	perplexity-train=4.1507314709891086	perplexity-val=7.199793623234354	time-elapsed=39590.212678194046	used-gpu-memory=4291
123	converged=False	diverged=False	epoch=126	gradient-norm=0.5316352431185861	learning-rate=0.0007	perplexity-train=4.142011924482205	perplexity-val=7.1926427170061356	time-elapsed=39915.75292348862	used-gpu-memory=4291
124	converged=False	diverged=False	epoch=127	gradient-norm=0.6341832860295119	learning-rate=0.0007	perplexity-train=4.139523900936656	perplexity-val=7.224176160492579	time-elapsed=40240.8657913208	used-gpu-memory=4291
125	converged=False	diverged=False	epoch=128	gradient-norm=0.5015767019976168	learning-rate=0.0007	perplexity-train=4.136519066103321	perplexity-val=7.275011745331535	time-elapsed=40565.28941655159	used-gpu-memory=4291
126	converged=False	diverged=False	epoch=129	gradient-norm=0.4558250465941615	learning-rate=0.0007	perplexity-train=4.1340054789695095	perplexity-val=7.262463216706521	time-elapsed=40890.68132328987	used-gpu-memory=4291
127	converged=False	diverged=False	epoch=130	gradient-norm=0.5766980424356186	learning-rate=0.0007	perplexity-train=4.129218086757308	perplexity-val=7.280888472920038	time-elapsed=41215.58781552315	used-gpu-memory=4291
128	converged=False	diverged=False	epoch=131	gradient-norm=0.46736630584778766	learning-rate=0.0007	perplexity-train=4.127315250089768	perplexity-val=7.3168251627985725	time-elapsed=41539.723729372025	used-gpu-memory=4291
129	converged=False	diverged=False	epoch=132	gradient-norm=0.5865394962390356	learning-rate=0.0007	perplexity-train=4.12434493718335	perplexity-val=7.256415805867125	time-elapsed=41861.65079712868	used-gpu-memory=4291
130	converged=False	diverged=False	epoch=133	gradient-norm=0.4972989076779041	learning-rate=0.0007	perplexity-train=4.123159906491967	perplexity-val=7.29650576191766	time-elapsed=42186.85779643059	used-gpu-memory=4291
131	converged=False	diverged=False	epoch=134	gradient-norm=0.6328834660300717	learning-rate=0.0007	perplexity-train=4.117901118682216	perplexity-val=7.28330866738779	time-elapsed=42510.55305624008	used-gpu-memory=4291
132	converged=False	diverged=False	epoch=135	gradient-norm=0.5799301088828098	learning-rate=0.00049	perplexity-train=4.047220109086317	perplexity-val=7.189300216835388	time-elapsed=42837.826151371	used-gpu-memory=4291
133	converged=False	diverged=False	epoch=136	gradient-norm=0.5053076987010919	learning-rate=0.00049	perplexity-train=4.050847414163335	perplexity-val=7.2154600772393485	time-elapsed=43162.989392995834	used-gpu-memory=4291
134	converged=False	diverged=False	epoch=137	gradient-norm=0.5217552898597285	learning-rate=0.00049	perplexity-train=4.025573114348861	perplexity-val=7.17392426222603	time-elapsed=43485.8529586792	used-gpu-memory=4291
135	converged=False	diverged=False	epoch=138	gradient-norm=0.6449426588175323	learning-rate=0.00049	perplexity-train=4.026566054641535	perplexity-val=7.257399607980661	time-elapsed=43810.08609676361	used-gpu-memory=4291
136	converged=False	diverged=False	epoch=139	gradient-norm=0.4920545317419208	learning-rate=0.00049	perplexity-train=4.024778559836662	perplexity-val=7.184514790782554	time-elapsed=44134.99518895149	used-gpu-memory=4291
137	converged=False	diverged=False	epoch=140	gradient-norm=0.5041366022918715	learning-rate=0.00049	perplexity-train=4.016875654127556	perplexity-val=7.204389777097158	time-elapsed=44459.89624929428	used-gpu-memory=4291
138	converged=False	diverged=False	epoch=141	gradient-norm=0.6394115614726117	learning-rate=0.00049	perplexity-train=4.0203620006643686	perplexity-val=7.216236553531978	time-elapsed=44784.40016198158	used-gpu-memory=4291
139	converged=False	diverged=False	epoch=142	gradient-norm=0.518445553209166	learning-rate=0.00049	perplexity-train=4.009150059557786	perplexity-val=7.233772283666632	time-elapsed=45108.836429834366	used-gpu-memory=4291
140	converged=False	diverged=False	epoch=143	gradient-norm=0.4871076295664065	learning-rate=0.00049	perplexity-train=4.015589155071271	perplexity-val=7.175600269326983	time-elapsed=45434.7427880764	used-gpu-memory=4291
141	converged=False	diverged=False	epoch=144	gradient-norm=0.5076448418062609	learning-rate=0.00049	perplexity-train=4.004265624194394	perplexity-val=7.216121958306906	time-elapsed=45759.52992320061	used-gpu-memory=4291
142	converged=False	diverged=False	epoch=145	gradient-norm=0.5297198634304892	learning-rate=0.00049	perplexity-train=4.006382302048329	perplexity-val=7.167207312168034	time-elapsed=46084.768949985504	used-gpu-memory=4291
143	converged=False	diverged=False	epoch=146	gradient-norm=0.5580551492508756	learning-rate=0.00049	perplexity-train=3.999330161629346	perplexity-val=7.171808485904473	time-elapsed=46410.40136671066	used-gpu-memory=4291
144	converged=False	diverged=False	epoch=147	gradient-norm=0.516358510887172	learning-rate=0.00049	perplexity-train=4.000364348765646	perplexity-val=7.173750092387385	time-elapsed=46734.80087971687	used-gpu-memory=4291
145	converged=False	diverged=False	epoch=148	gradient-norm=0.5856277745871707	learning-rate=0.00049	perplexity-train=3.9936385536355314	perplexity-val=7.192819302546133	time-elapsed=47058.81733131409	used-gpu-memory=4291
146	converged=False	diverged=False	epoch=149	gradient-norm=0.6411602744636814	learning-rate=0.00049	perplexity-train=3.998127764757173	perplexity-val=7.245636261146948	time-elapsed=47383.77396893501	used-gpu-memory=4291
147	converged=False	diverged=False	epoch=150	gradient-norm=0.5453245622387584	learning-rate=0.00049	perplexity-train=3.9971761771593153	perplexity-val=7.167034507902415	time-elapsed=47707.77849507332	used-gpu-memory=4291
148	converged=False	diverged=False	epoch=151	gradient-norm=0.5220935157030957	learning-rate=0.00049	perplexity-train=3.990786763418621	perplexity-val=7.218590150244437	time-elapsed=48032.60163259506	used-gpu-memory=4291
149	converged=False	diverged=False	epoch=152	gradient-norm=0.5861012026395885	learning-rate=0.00049	perplexity-train=3.9854717249597864	perplexity-val=7.166066855010378	time-elapsed=48362.645203351974	used-gpu-memory=4291
150	converged=False	diverged=False	epoch=153	gradient-norm=0.5127334834092464	learning-rate=0.00049	perplexity-train=3.990359921427472	perplexity-val=7.202006649468293	time-elapsed=48687.341084718704	used-gpu-memory=4291
151	converged=False	diverged=False	epoch=154	gradient-norm=0.5763734603040611	learning-rate=0.00049	perplexity-train=3.983640518128855	perplexity-val=7.175323955302499	time-elapsed=49011.97187399864	used-gpu-memory=4291
152	converged=False	diverged=False	epoch=155	gradient-norm=0.5204770249132338	learning-rate=0.00049	perplexity-train=3.984819926176431	perplexity-val=7.169508960415386	time-elapsed=49337.32584643364	used-gpu-memory=4291
153	converged=False	diverged=False	epoch=156	gradient-norm=0.5121550341925866	learning-rate=0.00049	perplexity-train=3.982516963211103	perplexity-val=7.215576036195793	time-elapsed=49661.05023932457	used-gpu-memory=4291
154	converged=False	diverged=False	epoch=157	gradient-norm=0.48457669555421873	learning-rate=0.00049	perplexity-train=3.983915859139942	perplexity-val=7.1759847904272975	time-elapsed=49985.48648524284	used-gpu-memory=4291
155	converged=False	diverged=False	epoch=158	gradient-norm=0.573886202522681	learning-rate=0.00049	perplexity-train=3.977881659716043	perplexity-val=7.25277262853581	time-elapsed=50310.27194619179	used-gpu-memory=4291
156	converged=False	diverged=False	epoch=159	gradient-norm=0.5008936485655643	learning-rate=0.00049	perplexity-train=3.976476352726522	perplexity-val=7.2118257218554955	time-elapsed=50635.137607097626	used-gpu-memory=4291
157	converged=False	diverged=False	epoch=160	gradient-norm=0.5153118177607566	learning-rate=0.00049	perplexity-train=3.971121855154199	perplexity-val=7.156152242984126	time-elapsed=50959.26517558098	used-gpu-memory=4291
158	converged=False	diverged=False	epoch=161	gradient-norm=0.552759114221306	learning-rate=0.00049	perplexity-train=3.9750825125699087	perplexity-val=7.159318953100969	time-elapsed=51283.58176803589	used-gpu-memory=4291
159	converged=False	diverged=False	epoch=162	gradient-norm=0.4997205053269006	learning-rate=0.00049	perplexity-train=3.971391599557271	perplexity-val=7.1802877098197895	time-elapsed=51607.972071409225	used-gpu-memory=4291
160	converged=False	diverged=False	epoch=163	gradient-norm=0.5412240103656677	learning-rate=0.00049	perplexity-train=3.9694147779770113	perplexity-val=7.138456426560147	time-elapsed=51933.42686295509	used-gpu-memory=4291
161	converged=False	diverged=False	epoch=164	gradient-norm=0.6131376753226953	learning-rate=0.00049	perplexity-train=3.9646660201800237	perplexity-val=7.148317056265286	time-elapsed=52256.39479470253	used-gpu-memory=4291
162	converged=False	diverged=False	epoch=165	gradient-norm=0.4897188846431422	learning-rate=0.00049	perplexity-train=3.9669842256652865	perplexity-val=7.125734381902457	time-elapsed=52581.24398565292	used-gpu-memory=4291
163	converged=False	diverged=False	epoch=167	gradient-norm=0.5699980993982907	learning-rate=0.00049	perplexity-train=3.961593772687657	perplexity-val=7.2139203621656325	time-elapsed=52908.31242966652	used-gpu-memory=4291
164	converged=False	diverged=False	epoch=168	gradient-norm=0.5421479097757101	learning-rate=0.00049	perplexity-train=3.960992449502295	perplexity-val=7.172149743051949	time-elapsed=53233.731462955475	used-gpu-memory=4291
165	converged=False	diverged=False	epoch=169	gradient-norm=0.652568723767451	learning-rate=0.00049	perplexity-train=3.95395554992961	perplexity-val=7.1735973312881525	time-elapsed=53557.06556606293	used-gpu-memory=4291
166	converged=False	diverged=False	epoch=170	gradient-norm=0.5622365049616717	learning-rate=0.00049	perplexity-train=3.9556978779758847	perplexity-val=7.203514390166316	time-elapsed=53879.03664803505	used-gpu-memory=4291
167	converged=False	diverged=False	epoch=171	gradient-norm=0.5818334338321643	learning-rate=0.00049	perplexity-train=3.9589460474878915	perplexity-val=7.1396246709244	time-elapsed=54203.25194263458	used-gpu-memory=4291
168	converged=False	diverged=False	epoch=172	gradient-norm=0.5200963880889677	learning-rate=0.00049	perplexity-train=3.95149816663008	perplexity-val=7.156728783444352	time-elapsed=54526.753208875656	used-gpu-memory=4291
169	converged=False	diverged=False	epoch=173	gradient-norm=0.5430545040449185	learning-rate=0.00049	perplexity-train=3.9487507798388695	perplexity-val=7.191466253941434	time-elapsed=54850.106244802475	used-gpu-memory=4291
170	converged=False	diverged=False	epoch=174	gradient-norm=0.765216130865023	learning-rate=0.00049	perplexity-train=3.9496050514536285	perplexity-val=7.198207440449064	time-elapsed=55174.8343937397	used-gpu-memory=4291
171	converged=False	diverged=False	epoch=175	gradient-norm=0.5543184539526425	learning-rate=0.000343	perplexity-train=3.89664568606217	perplexity-val=7.149521571828888	time-elapsed=55499.98434281349	used-gpu-memory=4291
172	converged=False	diverged=False	epoch=176	gradient-norm=0.5333172780191346	learning-rate=0.000343	perplexity-train=3.904670526529005	perplexity-val=7.190243683792062	time-elapsed=55825.33767271042	used-gpu-memory=4291
173	converged=False	diverged=False	epoch=177	gradient-norm=0.5876639108727117	learning-rate=0.000343	perplexity-train=3.8893225833815985	perplexity-val=7.146005814578726	time-elapsed=56148.66529893875	used-gpu-memory=4291
174	converged=False	diverged=False	epoch=178	gradient-norm=0.500518537557067	learning-rate=0.000343	perplexity-train=3.8890117545365785	perplexity-val=7.12970652050695	time-elapsed=56472.07284641266	used-gpu-memory=4291
175	converged=False	diverged=False	epoch=179	gradient-norm=0.4962624387956325	learning-rate=0.000343	perplexity-train=3.8854645147768205	perplexity-val=7.132313883831622	time-elapsed=56796.42426109314	used-gpu-memory=4291
176	converged=False	diverged=False	epoch=180	gradient-norm=0.6405425856255578	learning-rate=0.000343	perplexity-train=3.8878027288179817	perplexity-val=7.169334507647793	time-elapsed=57120.21177458763	used-gpu-memory=4291
177	converged=False	diverged=False	epoch=181	gradient-norm=0.48815727234765877	learning-rate=0.000343	perplexity-train=3.880251141129206	perplexity-val=7.131988978530988	time-elapsed=57443.57640171051	used-gpu-memory=4291
178	converged=True	diverged=False	epoch=182	gradient-norm=0.6422610873006558	learning-rate=0.000343	perplexity-train=3.877833973921821	perplexity-val=7.151012163584752	time-elapsed=57768.650903463364	used-gpu-memory=4291
