import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# 로이터 뉴스 데이터 임포트
from tensorflow.keras.datasets import reuters
data = reuters.load_data()
(X_train, y_train), (X_test, y_test) = data
# 뉴스기사 8982개
print(X_train.shape)
print(y_train.shape)
print(X_test.shape)
print(y_test.shape)
(8982,)
(8982,)
(2246,)
(2246,)
# 실제 뉴스 기사의 단어들을 다 숫자로 변경시켜서 저장해둔 데이터
# 현재 로이터 뉴스 기사 데이터셋은 레이블인코딩으로 인코딩을 해준 상태
# 레이블 인코딩을 하면서 가장 빈도수가 높은 순위로 숫자가 매겨져 있음
#(1이 가장 많이 나온 단어, 숫자가 커질수록 빈도수가 적은 단어)
X_train[0]
# -> 단어가 많은 경우 원핫인코딩을 사용하면 한 단어를 제외하고 모두 0으로 표시되기
# 때문에 저장 공간측면에서 매우 비효율적임
# -> 텍스트나 자연어 처리 분야에서는 문제 데이터에 레이블 인코딩을 활용하면
# 단어의 빈도수까지 고려해줄 수 있음
[1,
27595,
28842,
8,
43,
10,
447,
5,
25,
207,
270,
5,
3095,
111,
16,
369,
186,
90,
67,
7,
89,
5,
19,
102,
6,
19,
124,
15,
90,
67,
84,
22,
482,
26,
7,
48,
4,
49,
8,
864,
39,
209,
154,
6,
151,
6,
83,
11,
15,
22,
155,
11,
15,
7,
48,
9,
4579,
1005,
504,
6,
258,
6,
272,
11,
15,
22,
134,
44,
11,
15,
16,
8,
197,
1245,
90,
67,
52,
29,
209,
30,
32,
132,
6,
109,
15,
17,
12]
# 첫번째 뉴스기사는 87개의 단어로 이루어져있음
len(X_train[0])
87
# 로이터 뉴스기사의 단어들 별로 어떤 숫자로 인코딩 되어 있는지 확인
news_words = reuters.get_word_index()
- 단어의 빈도수에 따라 딕셔너리 데이터들을 정렬
# 딕셔너리는 순서가 없고 key값으로만 접근이 가능하기 때문에 value값도 같이 활용하기 위해 items 명령을 사용함
# items : 딕셔너리 형태의 key, value값을 튜플로 변경하여 dict_items 자료형태로 만들어주는 명령
news_words.items()
# 딕셔너리를 리스트로 만들려면 번거로운데 이를 줄이면서 정렬이나 반복문에 적용할 수 있는
# dict_items 형태로 변환하여 주로 사용함
- 기사가 어떤 단어들로 구성되어 있는지 이어붙여서 확인해보자
# 딕셔너리에서 key값과 value값을 체인지
word_of_news = {}
for key, value in news_words.items():
# 비어있는 딕셔너리에 값을 새롭게 넣는데 key값과 value값을 반대로 채워줌
# (인코딩된 숫자로 접근해서 문자를 뽑아와서 문장으로 만들기 편하게 해주기 위함!)
word_of_news[value] = key
print(word_of_news)
print('key값이 1번인 단어 :', word_of_news[1])
{10996: 'mdbl', 16260: 'fawc', 12089: 'degussa', 8803: 'woods', 13796: 'hanging', 20672: 'localized', 20673: 'sation', 20675: 'chanthaburi', 10997: 'refunding', 8804: 'hermann', 20676: 'passsengers', 20677: 'stipulate', 8352: 'heublein', 20713: 'screaming', 16261: 'tcby', 185: 'four', 1642: 'grains', 20680: 'broiler', 12090: 'wooden', 1220: 'wednesday', 13797: 'highveld', 7593: 'duffour', 20681: '0053', 3914: 'elections', 2563: '270', 3551: '271', 5113: '272', 3552: '273', 3400: '274', 7975: 'rudman', 3401: '276', 3478: '277', 3632: '278', 4309: '279', 9381: 'dormancy', 7247: 'errors', 3086: 'deferred', 20683: 'sptnd', 8805: 'cooking', 20684: 'stratabit', 16262: 'designing', 20685: 'metalurgicos', 13798: 'databank', 20686: '300er', 20687: 'shocks', 7972: 'nawg', 20688: 'tnta', 20689: 'perforations', 2891: 'affiliates', 20690: '27p', 16263: 'ching', 595: 'china', 16264: 'wagyu', 3189: 'affiliated', 16265: 'chino', 16266: 'chinh', 20692: 'slickline', 13799: 'doldrums', 12092: 'kids', 3028: 'climbed', 6693: 'controversy', 20693: 'kidd', 12093: 'spotty', 12639: 'rebel', 9382: 'millimetres', 4007: 'golden', 5689: 'projection', 12094: 'stern', 7903: "hudson's", 10066: 'dna', 20695: 'dnc', 20696: 'hodler', 2394: 'lme', 20697: 'insolvancy', 13800: 'music', 1984: 'therefore', 10998: 'dns', 6959: 'distortions', 13801: 'thassos', 20698: 'populations', 8806: 'meteorologist', 43: 'loss', 9383: 'exco', 20813: 'adventist', 16267: 'murchison', 10999: 'locked', 13802: 'kampala', 20699: 'arndt', 1267: 'nakasone', 20700: 'steinweg', 3633: "india's", 3029: 'wang', 10067: 'wane', 13803: 'unjust', 13804: 'titanium', 850: 'want', 20701: 'pinto', 16268: "institutes'", 7973: 'absolute', 4677: 'travel', 6422: 'cutback', 16269: 'nazmi', 1858: 'modest', 16270: 'shopwell', 20702: 'sedi', 20703: 'adoped', 16271: 'tulis', 20704: '18th', 20705: "wmc's", 20706: 'menlo', 11000: 'reiners', 12095: 'farmlands', 20707: 'nonsensical', 20708: 'elisra', 2461: 'welcomed', 20709: 'peup', 16272: "holiday's", 20711: 'activating', 16273: 'avondale', 16274: 'interational', 20712: 'welcomes', 16275: 'fip', 11001: 'tailings', 4205: 'fit', 16276: 'lifeline', 1916: 'bringing', 4819: 'fix', 6164: '624', 12096: 'naturalite', 6165: 'wales', 8807: 'fin', 11129: 'fio', 20714: 'ceremenony', 20715: 'sovr', 20716: "yeo's", 1788: 'effects', 13805: 'sixteen', 8808: 'undeveloped', 13806: 'glutted', 20717: 'barton', 20718: 'froday', 10089: 'arrow', 11002: 'stabilises', 6960: 'allan', 20719: '374p', 3891: '393', 4008: '392', 4206: '391', 3079: '390', 4550: '397', 6166: '396', 6423: '395', 4207: '394', 6961: '399', 4208: '398', 7595: 'stabilised', 5114: 'smelters', 20720: 'oprah', 20721: 'orginially', 20722: "tvx's", 16278: 'ponomarev', 20723: 'enviroment', 20724: "reeves'", 8363: 'mason', 1670: 'encourage', 7596: 'adapt', 12776: 'abbott', 13808: 'stamping', 20726: 'colquiri', 11003: 'ambrit', 8353: 'strata', 4821: 'corrects', 11922: 'sandra', 859: 'estimate', 20727: 'universally', 20728: 'chlorine', 16279: 'competes', 10068: 'leiner', 8809: 'ministries', 8810: 'disturbed', 13809: 'competed', 8811: 'juergen', 13810: 'kfw', 11004: 'turben', 9384: 'reintroduced', 20729: 'maladies', 4101: 'chevron', 16280: 'lazere', 8812: 'antilles', 11907: 'dti', 9070: 'specially', 4678: 'bilzerian', 13811: 'bakelite', 20730: 'renovated', 568: 'service', 16281: 'payless', 20731: 'spiegler', 831: 'needed', 16282: 'wigglesworth', 6962: 'master', 13812: 'antonson', 20732: 'genesis', 13813: 'vismara', 20734: 'organically', 20735: "accords'", 5940: 'task', 7974: 'positively', 3479: 'feasibility', 6963: 'ahmed', 13814: "suralco's", 20736: 'awacs', 16283: 'idly', 20737: 'regulator', 12097: 'pseudorabies', 16284: 'staubli', 8813: 'nzi', 5115: 'feeling', 3127: '275', 20738: '6819', 16285: 'gorman', 8354: 'sustaining', 9385: 'spectrum', 20739: 'consenting', 12098: 'recapitalized', 11562: 'sailed', 7597: 'dozen', 1985: 'affairs', 2253: 'courier', 8355: 'kremlin', 895: 'shipments', 16286: "aquino's", 10070: 'committing', 5293: 'sugarcane', 9386: 'diminishing', 16287: 'vexing', 11005: 'simplify', 6167: 'mouth', 7248: 'steinhardt', 8814: 'conceded', 9387: 'bradford', 7976: 'singer', 20740: '5602', 13816: "1987's", 4950: 'tech', 6424: 'teck', 20741: 'majv', 666: 'saying', 16477: 'dickey', 20742: 'sweetner', 21149: 'teresa', 20743: 'ulcer', 13817: 'cheaply', 2361: 'thai', 6964: 'orleans', 16290: 'excavator', 6168: 'rico', 12099: 'lube', 13818: 'rick', 4679: 'rich', 13819: 'kerna', 950: 'rice', 4209: 'rica', 5503: 'plate', 16291: 'platt', 8356: 'altogether', 8815: 'jaguar', 20744: 'dynair', 8816: 'patch', 2892: 'ldp', 13820: 'boarded', 16292: 'precluding', 11006: 'clarified', 16293: 'sensitivity', 1511: 'alternative', 11007: 'clarifies', 5116: 'lots', 7598: 'irs', 20745: 'irv', 13821: 'iri', 13822: 'ira', 5690: 'timber', 20746: 'ire', 5219: 'discipline', 1937: 'extend', 3634: 'nature', 16295: "amb's", 16296: 'dunhill', 2142: 'extent', 20747: 'restrcitions', 2396: 'heating', 11008: "mannesmann's", 20748: 'outsanding', 20749: 'multimillions', 13824: 'sarcinelli', 6694: 'southeastern', 10071: 'eradicate', 9388: 'libyan', 20750: 'foreclosing', 12101: 'maclaine', 20751: 'fra', 353: 'union', 11009: 'frn', 386: 'much', 12102: 'fry', 20752: 'mothball', 10072: 'chlorazepate', 12103: 'dxns', 19981: 'toyko', 20753: 'spit', 16297: '007050', 16298: 'freehold', 13825: 'davy', 11010: 'dave', 12177: 'spie', 10117: 'aguayo', 12104: 'wildcat', 10069: 'fecs', 20754: 'kennan', 16299: 'intal', 9389: 'contingencies', 16551: 'professionally', 16300: 'microbiological', 20756: 'misconstrued', 409: 'k', 20757: 'securitiesd', 16301: 'deferring', 5941: 'kohl', 3030: 'conditioned', 20758: 'fnhb', 16302: "october's", 13954: 'memorial', 6965: 'democracies', 27520: 'conformed', 464: 'split', 12105: "bond's", 11112: 'thinly', 16515: 'dunkirk', 16303: 'cavanaugh', 13827: "securities'", 21345: 'marches', 16304: 'issam', 2020: 'workforce', 12106: 'meinert', 13828: 'boiler', 5294: "bp's", 16305: 'torpedoed', 20762: 'indidate', 13829: 'downwardly', 20763: 'viviez', 20764: 'vladiminovich', 16306: 'academic', 20765: 'architecural', 1117: 'corporate', 16307: 'appropriately', 20766: 'teicc', 20767: "hanover's", 8817: 'aristech', 20768: 'portrayed', 21383: 'raffineries', 20770: 'hai', 7599: 'hal', 13830: 'ham', 10073: 'han', 20771: 'e15b', 61: 'had', 20772: 'hay', 13831: 'botchwey', 10074: 'haq', 37: 'has', 13832: 'hat', 20773: 'hav', 20774: 'fortin', 8818: 'municipal', 20775: 'osman', 20776: 'fsical', 3480: 'elders', 12107: 'survival', 16308: 'unequivocally', 2519: 'objective', 6695: 'indicative', 10075: 'shadow', 21411: 'riskiness', 20778: 'positiive', 10076: "american's", 16309: 'alick', 16310: 'harima', 12108: 'alice', 20779: 'altschul', 16311: 'festivities', 20780: 'medecines', 2942: 'beneficial', 12109: 'yoweri', 13833: 'crowd', 9390: 'crowe', 3553: 'crown', 13679: 'topping', 8819: 'captive', 12110: 'billboard', 6169: 'fiduciary', 3402: 'bottom', 20782: 'plucked', 20783: 'locksmithing', 9391: 'ecopetrol', 24018: 'pipestone', 5505: "growers'", 20785: 'borrows', 16312: 'eduard', 13834: 'venpres', 16313: 'bamboo', 13835: 'foolish', 20786: 'uruguyan', 20787: 'officeholders', 20788: 'economiques', 16314: 'aden', 4822: 'maxwell', 4680: 'marshall', 16315: 'honeymoon', 16316: 'administer', 20790: 'shoots', 16317: 'rubbertech', 16318: 'johsen', 10077: 'reciprocity', 13836: 'fabric', 20791: 'suffice', 20792: 'spokemsan', 20793: "sonora's", 16319: '5865', 16320: "systems'", 20794: 'perfumes', 20795: 'halycon', 20796: 'nonvoting', 7250: 'safeguard', 21538: 'sawdust', 20797: "else's", 13837: 'arrays', 20798: 'aza', 20799: 'smasher', 12111: 'complications', 1813: 'pesos', 20800: 'relabelling', 3722: 'passenger', 12112: "avon's", 20801: 'megahertz', 10683: 'mirror', 8357: 'minas', 16322: 'bourdain', 20802: 'crownx', 6425: 'eventual', 1207: 'crowns', 1369: 'role', 20803: 'obliges', 16323: 'rolf', 13838: 'vegetative', 20804: 'rolm', 4419: 'roll', 2463: 'intend', 16324: 'palms', 19255: 'denys', 13839: 'transported', 20805: 'moresby', 16325: 'devon', 1351: 'intent', 20806: "camco's", 5942: 'variable', 20807: 'transporter', 16326: 'danske', 13840: 'friedhelm', 8358: 'hawker', 17774: "sand's", 20808: 'preseving', 12113: '80386', 16328: 'bnls', 19984: 'ordination', 11011: 'overturned', 16329: 'erred', 6696: 'cincinnati', 16710: 'corps', 20809: 'whoever', 16330: 'osp', 13841: 'osr', 12114: 'ost', 16331: 'chair', 5647: '690', 20810: 'grapples', 13842: 'megawatts', 20811: 'photocopiers', 20812: 'sconninx', 2274: 'circumstances', 13843: 'oversight', 20814: "paradyne's", 6363: '691', 20815: 'paychecks', 13844: "stadelmann's", 3241: 'choice', 11012: 'vastagh', 8820: 'embark', 9392: 'gloomy', 9393: 'stays', 4009: 'exact', 5117: 'minute', 11892: 'kittiwake', 20816: 'picul', 20817: 'skewed', 11013: 'cooke', 10078: 'defaults', 11014: 'reimpose', 9394: 'hindered', 20818: 'lengthened', 16333: 'chopping', 13845: 'mckiernan', 20819: 'collaspe', 7251: 'corazon', 7600: 'antwerp', 13846: 'abdullah', 13847: 'goldston', 442: '300', 20821: 'cassa', 20822: 'casse', 4081: '695', 2979: 'ground', 839: 'boost', 16334: 'azusa', 9395: 'drafted', 4823: '303', 13848: 'climbs', 7601: 'honour', 20823: 'vanderbilt', 3968: '305', 3031: 'address', 8821: 'dwindling', 7252: 'benson', 12115: 'enroll', 501: 'revenues', 12116: 'impacted', 20826: 'queue', 10079: 'accomplished', 7602: 'throughput', 9396: 'influx', 10080: 'stockbuilding', 20827: 'aproximates', 13849: 'petroleo', 16335: 'sistemas', 14053: 'feretti', 5943: 'opposes', 882: 'working', 20829: 'perished', 13850: 'oldham', 20830: '27000', 19245: 'optimize', 20832: 'vigour', 1580: 'opposed', 16336: 'liberalizing', 20833: 'wvz', 20834: 'dampness', 13851: 'approving', 13496: 'sierra', 20835: 'entrepot', 224: 'currency', 1499: 'originally', 20837: 'tindemans', 16337: 'valorem', 477: 'following', 20838: 'fossen', 11016: 'locke', 20839: 'employess', 12117: 'rotberg', 16338: 'parachute', 11017: 'locks', 12255: 'incremental', 16339: 'woolowrth', 20841: 'listens', 7253: 'litre', 3554: 'edouard', 1377: 'ounce', 20843: 'nicanor', 20844: 'sucocitrico', 16340: 'minicomputers', 16341: "silva's", 11018: 'restitutions', 16342: 'custer', 2590: '3rd', 10081: 'fueled', 20845: 'trydahl', 11019: 'aice', 12118: 'harmon', 10082: 'conscious', 20846: 'herbicidesand', 20847: 'subdivisions', 20848: "veslefrikk's", 11020: 'swollen', 7978: 'pulled', 20849: 'tilney', 203: 'years', 20850: 'structuring', 20851: 'episodes', 16343: 'sportscene', 16344: "northair's", 20852: 'jig', 20853: 'jin', 3403: 'jim', 8359: 'troubles', 13852: 'workforces', 2362: 'suspension', 3892: 'troubled', 16345: 'fondiaria', 6697: 'modestly', 12119: 'recipients', 7979: 'civilian', 13853: 'indigenous', 20854: 'overpowering', 1051: 'drilling', 16346: 'sorted', 16347: 'lichtenstein', 20855: 'bedevil', 20856: 'dispite', 16843: 'battleships', 4824: 'instability', 95: 'quarter', 20857: 'salado', 5692: 'honduras', 13855: "chevron's", 12273: "lazere's", 2660: 'receipt', 8360: 'sponsor', 4825: 'entering', 16349: "kcbt's", 19987: 'nowicki', 13856: 'salads', 16351: 'augar', 7980: '797', 7254: '796', 8361: '795', 5295: '794', 5118: '793', 6170: '792', 5296: '791', 4826: '790', 20858: "nikko's", 20859: 'unsaleable', 5720: '799', 5693: '798', 2143: 'seriously', 16352: 'trauma', 20860: 'tvbh', 20861: 'macedon', 21906: 'disintegrated', 21909: 'adddition', 2244: 'incentives', 5944: 'complicated', 20864: 'reevaluating', 21921: 'thatching', 7981: 'brasil', 20865: '79p', 4951: 'wrong', 8822: 'initiate', 16353: 'aboard', 7255: 'saving', 8823: 'spoken', 16364: 'parkinson', 65: 'one', 20867: 'ont', 7256: 'concert', 16354: "boston's", 13859: 'stifled', 4622: 'types', 20868: 'lingering', 16356: 'surges', 20869: 'hurdman', 16357: 'herds', 14114: 'absorbs', 4681: 'surged', 14211: 'dalkon', 13860: 'crossroads', 20870: 'shakeup', 20871: 'disasterous', 11021: 'illness', 3242: 'turned', 3801: 'locations', 12120: 'tyranite', 13861: 'minesweepers', 7257: 'turner', 20872: 'borough', 12358: 'underlines', 20873: "bancorporation's", 20874: 'fashionable', 20875: "ae's", 16358: 'dilutions', 9472: 'goodman', 10510: 'unlawfully', 16359: 'mayer', 16360: 'printer', 20877: 'offload', 13862: 'opposite', 738: 'buffer', 9398: 'printed', 16361: 'pequiven', 13863: 'panoche', 20878: 'knowingly', 16362: 'ecusta', 20879: 'thsl', 8825: 'phil', 13864: 'jitters', 16363: 'touche', 20881: 'jittery', 3291: 'friction', 16365: 'fecal', 22068: 'resurgance', 20882: 'heeding', 2363: 'soviets', 16366: 'imagined', 16367: 'transact', 20883: 'califoirnia', 9399: "chrysler's", 16368: 'respecitvely', 16369: 'presse', 10084: 'euromarket', 12121: 'guarded', 16371: 'satisfacotry', 20884: 'authroization', 20885: 'simplistic', 20886: 'monde', 4102: 'awaiting', 13865: 'recombinant', 20887: 'refinancement', 20888: 'comserv', 20889: 'kitakyushu', 16372: 'pima', 11022: 'basle', 20891: '6250', 16373: 'choudhury', 8826: 'vision', 20892: 'interruptible', 13866: 'weatherford', 7982: '832', 5694: '833', 4420: '830', 5119: '831', 5297: '836', 4553: '837', 6172: '834', 4952: '835', 22144: 'alarming', 5695: '838', 6173: '839', 20893: '524p', 20894: 'sponsorship', 12122: 'vendex', 20895: "amsouth's", 20896: 'kilometer', 10086: 'enjoys', 20897: 'illiberal', 6174: 'punta', 20898: 'punte', 10087: 'girozentrale', 20899: 'missstatements', 10088: 'marietta', 6175: 'awards', 3635: 'concentrated', 20900: '83p', 13867: 'developpement', 13868: 'rhodes', 5696: 'matheson', 20901: '1720', 20902: 'paring', 35: 's', 4953: 'concentrates', 16374: "can's", 22183: 'polysaturated', 20903: 'parini', 13869: 'baden', 20904: 'bader', 12123: 'buoyancy', 20905: 'erdem', 16375: 'properites', 20906: 'comparitive', 12124: 'practises', 20907: 'collides', 189: 'west', 20908: 'wess', 13870: 'collided', 20909: 'practised', 20910: "amalgamated's", 20911: 'motives', 1378: 'wants', 1273: 'formed', 20912: 'readings', 12125: 'geothermal', 7315: 'tightened', 11023: "d'or", 1109: 'former', 20913: 'venezulean', 19935: 'curd', 12126: 'squeezes', 1019: 'newspaper', 817: 'situation', 13871: 'ivey', 3636: 'engaged', 13872: 'dubious', 17061: 'cayacq', 20916: 'cobol', 20917: 'limping', 883: 'technology', 20919: 'koerner', 16376: 'debilitating', 7983: 'verified', 4010: 'otto', 20920: '7770', 16377: 'emulsions', 16378: "onic's", 9075: 'slate', 20921: 'wires', 5506: 'edged', 20922: 'assigns', 1341: 'singapore', 20923: 'deflate', 20924: "strategy's", 16379: 'walesa', 4554: 'advertisement', 20925: 'luyten', 20926: 'shrortly', 20927: 'corpoartion', 22290: 'preferance', 16380: 'tracking', 13874: 'sunnyvale', 20928: 'colorants', 16381: 'persistently', 16382: "officers'", 20929: "his's", 367: 'being', 7259: 'divestitures', 20930: 'steamer', 20931: 'rover', 8362: 'grounded', 16383: "businessmen's", 16384: 'cyanidation', 20932: 'overthrow', 20933: 'partnerhip', 16385: 'sumt', 8827: 'sums', 16386: 'oelmuehle', 16387: 'unveil', 13875: 'gestures', 20934: 'penta', 2544: 'traffic', 2428: 'preference', 20935: 'sumi', 166: 'world', 9400: 'postal', 16388: 'bced', 12128: 'dornbush', 14215: 'confine', 20936: '2555', 5945: "zambia's", 20937: 'superiority', 20938: 'militate', 2395: 'satisfactory', 20939: 'superintendent', 5946: 'tvx', 16389: 'tvt', 6698: 'magma', 20940: 'diving', 15548: 'tvb', 13876: 'seaman', 11025: 'matsunaga', 4827: '919', 5298: '918', 17070: 'refundable', 5947: '914', 7260: '917', 6699: '916', 5507: '911', 4828: '910', 10213: 'restoring', 4555: '912', 20942: 'squabble', 7261: 'retains', 20943: "partner's", 5300: 'leadership', 11026: 'graaf', 20944: 'spacelab', 1800: 'thailand', 9402: 'graan', 20945: 'exasperating', 12129: 'hartmarx', 16390: 'frights', 20946: 'niall', 11027: 'johnston', 16391: '91p', 16392: 'sensitively', 6016: 'porsche', 15494: 'prepares', 12130: 'lively', 10686: 'stoppages', 16394: "associated's", 12131: 'pivot', 1037: 'series', 24050: 'sese', 7604: 'bubble', 16395: 'trusses', 20949: 'interestate', 20950: 'continents', 20951: 'societal', 28: 'with', 6176: 'pull', 6700: 'rush', 6222: 'monopoly', 20953: 'operationally', 20954: 'dirty', 10090: 'abuses', 7262: 'prudhoe', 5949: 'pulp', 16396: 'rust', 20955: 'hellman', 20956: 'amdec', 16397: 'australasian', 13878: 'watches', 20957: 'hypertension', 20958: "hemdale's", 16398: 'formulation', 7605: 'watched', 20959: 'jargon', 13879: 'cream', 9404: 'ideally', 11028: 'ryavec', 20960: 'microoganisms', 13880: 'indemnify', 20961: 'wincenty', 20962: 'waving', 20963: "multifood's", 20964: 'midges', 11029: 'natalie', 13881: 'crosbie', 20965: 'posible', 13882: 'omnibus', 20966: 'assetsof', 13883: 'tricks', 16399: 'rs', 20967: 'kilogram', 25363: 'pruning', 13884: 'dyer', 20968: 'dyes', 20969: 'legislatures', 16400: 'scm', 9405: 'sci', 20970: 'riedel', 16401: 'ceramic', 6701: 'unitholders', 13885: 'scb', 20971: 'dn11', 20972: 'conditionality', 13807: "stock's", 20973: 'masland', 7606: 'causes', 10091: 'riots', 20974: 'norf', 9406: 'nord', 3893: 'midwest', 13886: 'tamils', 16402: 'ofthe', 3421: "colombia's", 11030: '24th', 20975: 'sant', 10092: 'moines', 22577: 'electrotechnical', 24534: 'proceeded', 20976: 'sanz', 13887: 'insufficiently', 20977: 'sang', 5950: 'sand', 16404: 'bracho', 805: 'small', 20978: 'workloads', 6702: 'sank', 20979: 'kemper', 16405: 'abbreviated', 13888: 'quicker', 3802: '199', 3243: '198', 2661: '195', 3080: '194', 4310: '197', 3894: '196', 2850: '191', 2199: '190', 3481: '193', 3350: '192', 582: 'past', 20980: 'fractionation', 20981: 'displays', 3081: 'pass', 202: 'investment', 27062: 'quals', 16406: 'quicken', 20983: "centronic's", 20984: 'menswear', 16407: 'clock', 20985: 'teape', 20986: 'teapa', 10093: 'prevailed', 9407: 'hebei', 379: 'full', 22618: '39844', 12133: 'sarasota', 20987: '19p', 20988: 'attendence', 20989: 'kanasa', 9834: 'haruo', 723: 'november', 8828: 'landslide', 3405: 'experience', 252: 'prior', 8364: 'periodic', 20990: 'cessation', 16408: 'skepticism', 3555: "friday's", 1291: 'followed', 7263: 'retroactive', 20991: 'lfio', 16409: 'lincan', 20992: 'silva', 20993: 'attendance', 20994: '3456', 20995: 'enliven', 13889: 'subsidaries', 13890: 'fragrance', 16410: 'nederlanden', 92: 'more', 3406: 'door', 6703: 'initiated', 16411: 'substances', 46: 'company', 5697: 'corrected', 11109: 'chiari', 6426: 'tested', 13891: 'geipel', 20997: "maxtor's", 6918: 'slifer', 16412: '345p', 20999: 'anxieties', 14299: 'installing', 8829: 'learn', 8365: 'knocked', 21001: 'changeover', 13892: 'scramble', 12135: 'ponta', 16413: 'lesotho', 21002: 'datachecker', 16414: 'recapitalize', 11031: 'bonded', 1593: 'huge', 3803: 'respective', 9045: 'brookehill', 13893: 'speedboat', 16415: 'hugo', 12136: 'hugh', 3804: 'dismissed', 21003: 'dismisses', 16417: 'bretz', 1721: 'intended', 13643: 'brett', 3556: 'lanka', 13894: 'jiang', 21005: 'bruks', 13895: 'resemble', 18535: 'knobbs', 2806: 'replied', 5509: 'installed', 3805: 'resorts', 907: 'paper', 3407: 'scott', 1605: 'signs', 21007: "dynamics'", 21008: 'installer', 21009: 'roots', 21010: "innovex's", 7607: 'revalued', 21011: 'xiaoping', 21012: 'hectolitre', 16418: 'perrodo', 21013: 'bypass', 13896: 'sauce', 21014: 'focussed', 21015: 'abandons', 21016: 'companywide', 12137: 'comstock', 13897: 'coursed', 21017: 'ecra', 21018: 'mandarins', 11243: 'chrobok', 21020: 'denny', 11032: 'courses', 12138: 'piping', 21021: 'cetane', 16419: 'betweenm', 2662: 'repayment', 16420: 'associes', 9408: 'reactions', 21022: 'engendered', 12139: "pc's", 9409: 'gilberto', 16421: 'gfi', 11033: 'advertisers', 1002: 'operation', 21023: 'lipstick', 900: 'research', 13898: 'weightings', 16422: "enzon's", 8366: 'mineworkers', 21024: 'triennial', 7264: 'occurs', 16423: "creek's", 13899: 'jiangsu', 21025: 'paringa', 7984: 'evergo', 21026: 'kertih', 11034: 'abnormally', 24062: 'saltpeter', 21027: 'plowing', 17276: 'pyne', 5510: 'definition', 16425: 'gakki', 21029: 'genecor', 16426: 'imre', 21030: 'nynas', 16427: "spc's", 21031: 'precautionary', 11035: 'preservation', 21032: 'firstg', 6967: 'moderately', 21033: 'dfm', 5511: 'calculations', 21034: 'aver', 12140: 'metzenbaum', 10094: 'subsidise', 21035: "paul's", 6704: "exxon's", 9410: 'interviewed', 21036: 'advsiors', 16428: 'typhoon', 8830: 'novamin', 11036: 'interviewer', 2275: 'carryforward', 16429: 'intergold', 10095: 'ziering', 16430: 'underpinning', 5301: 'dismantling', 2851: 'emirates', 12141: 'breslube', 10096: 'prescott', 13900: 'brining', 13901: 'nikolai', 16431: 'petrocanada', 21037: 'qustion', 5512: 'organisations', 16432: 'guarding', 21038: 'nikolay', 7019: 'reorganisation', 334: 'sell', 21039: 'selm', 21040: 'permanence', 2565: 'self', 72: 'also', 13902: 'recognizing', 12142: 'selz', 16434: 'pocono', 21041: 'departmental', 21042: 'disdmutase', 13903: 'virus', 21043: 'yunfu', 10097: 'singled', 10098: 'understands', 4421: 'laboratories', 13904: 'omaha', 21044: 'arson', 12143: 'seize', 6177: 'sometimes', 25815: 'advisement', 9411: 'barred', 520: 'barrel', 15874: 'ccb', 6178: 'amusements', 4311: 'bulletin', 9412: 'wording', 27560: 'piper', 16435: 'ugo', 16436: '780p', 21046: 'ugc', 8367: 'celeron', 10099: 'bookings', 21047: 'overwhelmed', 16437: 'occupation', 21048: 'uruc', 11037: 'cassette', 21050: "asturiana's", 16438: 'columns', 5698: 'lombard', 21051: '3m', 21052: 'secular', 16439: 'ceasing', 13905: 'sunny', 7985: 'remedy', 13906: 'compass', 16440: 'montedision', 27563: '3a', 16441: 'shabwa', 21054: 'wrecks', 6705: "utility's", 16442: "southern's", 21055: 'oceaneering', 2511: 'tanker', 6428: 'prospectus', 6706: 'rumored', 21056: 'insane', 3806: '319', 3895: '318', 4384: 'allis', 4011: '312', 4312: '311', 2938: '310', 3896: '317', 3408: '316', 3409: '315', 4556: '314', 12144: 'signups', 8368: 'collectively', 21057: 'shuichi', 27565: 'mcmurray', 21058: "phillips's", 21059: 'hibbert', 21060: 'offier', 12145: 'unrealized', 4954: 'idle', 16443: 'vmnb', 17378: '15th', 21062: 'contstitutes', 16444: "utah's", 6968: '447', 21063: '31p', 11038: 'moengo', 16445: 'canrad', 4777: '445', 21064: 'empowering', 51: 'last', 2939: 'henley', 16446: 'hardart', 2054: 'connection', 16447: 'algonquin', 21066: 'retarded', 21067: 'lasl', 21068: 'sadat', 16448: 'hiap', 21069: 'belo', 2159: 'bell', 12146: 'belb', 4829: 'acted', 17397: 'belg', 27677: 'optioon', 16449: '8125', 4830: 'belt', 7538: 'lender', 21071: 'imperfections', 8831: 'geoffrey', 21072: 'initiation', 21073: 'sisak', 12147: 'faulkner', 21074: "harcout's", 21075: 'patrolled', 16451: 'frito', 12148: 'nrc', 21076: 'infect', 23171: 'nra', 15876: 'reincorporation', 10278: 'fritz', 543: '38', 13907: 'pacemaker', 16452: 'frits', 12149: 'consulate', 21077: 'presses', 21078: 'kakuei', 2277: 'expanded', 383: 'budget', 16453: 'datagene', 4422: 'pressed', 9200: 'caltex', 16454: 'khashoggi', 16455: 'budged', 21079: 'seams', 21080: 'vof', 21081: '7800', 3410: 'averaging', 5513: 'von', 2146: 'motors', 16456: 'algoma', 7265: 'raiders', 21082: "nikonov's", 21083: "move's", 21085: '2743', 21086: 'harvesters', 16457: 'cayman', 16458: 'nabi', 7609: 'flooded', 9289: 'discontinue', 23233: 'naba', 4853: 'principally', 21089: 'fleischer', 21090: 'sandvik', 21091: "motor'", 4982: 'climb', 21092: 'delisting', 7266: 'recoverable', 21093: "endotronics'", 3723: "bundesbank's", 13908: 'suffolk', 21094: 'thaddeus', 21095: 'hercules', 21096: '3043', 23279: 'spinners', 1298: 'tin', 6179: "o'neill", 13909: 'foxcroft', 21097: 'previosuly', 16460: 'yasushi', 2021: 'indonesian', 11039: "province's", 2096: 'emergency', 4831: 'couple', 11040: 'emanating', 8370: 'emergence', 21098: 'khur', 21099: 'thurman', 3483: 'individuals', 16461: "nova's", 21100: 'explainable', 1296: 'privately', 3244: 'methods', 13910: 'bounce', 21101: 'wiating', 21102: 'ncol', 3724: 'mideast', 21103: 'greener', 8833: 'tisch', 3897: 'redstone', 16463: 'stringfellow', 21104: 'measurements', 21105: 'novelty', 21106: 'behave', 21107: "key's", 21108: 'inserting', 21109: 'trimmmed', 1828: 'wells', 12150: 'jornal', 21110: 'janusz', 21111: 'obscured', 21112: 'montero', 16464: 'malaba', 21113: 'deserved', 12151: 'dosher', 16465: 'ferrosilicon', 11041: 'brinkmann', 13911: "bahrain's", 21114: 'exeed', 8834: 'melbourne', 21115: 'deserves', 21116: 'impoundments', 12152: 'canning', 16466: 'intf', 11042: 'yergin', 13912: 'intc', 21117: 'terrorists', 169: 'into', 21118: 'intn', 21119: 'inti', 21120: 'nenryo', 21121: 'intw', 16467: 'middleton', 12153: 'contributor', 21122: 'inspects', 3557: "norway's", 5121: 'uncertainties', 10101: 'span', 8835: 'gases', 16468: 'deutschemark', 21123: "chalmers's", 12154: 'spar', 16469: 'redoubled', 5699: 'opens', 11310: 'fragmented', 12155: 'atlantis', 9413: 'medco', 7610: 'hawaiian', 21124: 'lichtbalu', 8371: 'deductions', 2022: 'atlantic', 21125: 'atlantia', 2545: 'considerable', 14492: 'heinrich', 21126: 'fades', 21127: 'swasey', 21128: 'bayvet', 11043: 'intis', 2464: 'maturity', 21129: 'preemptive', 21130: 'faded', 2546: 'retaliatory', 21131: 'sudbury', 21132: 'counterbore', 16471: 'chad', 21133: 'subtracting', 23436: 'deadliest', 13913: 'chao', 11044: 'globally', 11045: 'chas', 16750: 'shaif', 11046: 'diverse', 21134: 'ujungpandang', 15097: "columbia's", 21136: 'goldner', 14499: 'furnas', 17524: 'crompton', 21138: 'decsion', 13914: 'pathmark', 2663: 'revision', 13915: 'kaakebeen', 16472: 'oblige', 16473: 'lufthansa', 16474: 'suppression', 6707: 'reinsurance', 21140: 'slaughtering', 5122: 'lane', 1069: 'land', 17539: 'gunboats', 21141: 'commerice', 9414: 'cwts', 13916: 'snowcover', 21142: 'modernizing', 21143: 'suthee', 11047: 'incorporate', 13917: 'shiratori', 6429: 'broaden', 10102: 'cobalt', 21144: 'levelland', 4866: 'broader', 11048: 'canterra', 21145: '712p', 8372: 'amalgamation', 16476: 'unforseen', 2895: 'turkish', 21146: 'ncng', 16288: "degussa's", 21147: 'amtron', 16478: 'newhall', 7611: "association's", 23510: 'dickinson', 6003: 'indicating', 5952: 'lindner', 12156: 'gerais', 23518: "saunders'", 21150: 'dryland', 21151: 'interagra', 21152: '7124', 23533: '7125', 1986: 'minerals', 21153: 'lombarde', 5953: 'harbor', 713: 'f', 4012: 'fewer', 16479: 'underutilised', 20613: 'revitalise', 21155: 'leonardo', 4832: '591', 5123: '590', 6708: '593', 6180: '592', 6181: '595', 5303: '594', 5514: '597', 5304: '596', 6430: '599', 4013: '598', 6182: 'unprofitable', 1722: 'video', 3128: 'dynamics', 21156: 'tentonian', 5515: 'victor', 13919: 'chalaby', 12157: 'waning', 16480: 'anounced', 21157: 'midrate', 12158: 'henceforth', 16481: 'aberrational', 7612: 'chalabi', 3898: 'turnaround', 5516: 'flowing', 16482: "simon's", 21158: 'aancor', 21159: 'hatakeyama', 16483: 'fifteen', 21160: 'bakersfield', 21161: 'tdy', 21162: '5571', 11181: 'fetch', 21163: "material's", 1500: 'survey', 16484: 'mawr', 1357: 'makes', 1767: 'maker', 16485: 'panicked', 21164: '3267', 13505: '1934', 12159: 'kidokoro', 23614: 'japex', 1749: 'confidence', 21165: 'blife', 21166: "princeville's", 21167: 'nagoya', 222: 'next', 6970: 'eleven', 23620: 'guldemond', 21168: "iv's", 12161: 'yugoslavian', 12162: 'heiwa', 21169: 'pfd', 21170: 'pamit', 16487: 'undeclared', 21171: 'babk', 23638: 'businessland', 11050: 'baby', 7986: "exporters'", 21172: 'deregulatory', 1594: 'customer', 11051: 'integrating', 21173: "cftc's", 2630: 'clients', 8373: 'benefitting', 21174: 'precipitators', 21175: 'barend', 13920: 'penobscot', 6183: 'wedge', 21176: 'painstakingly', 1149: 'process', 6184: 'lock', 7267: 'promotional', 21177: 'nears', 16489: 'rods', 10103: 'bolstered', 10104: 'educational', 16490: '12ths', 21178: 'petroeleos', 11052: 'hormones', 22405: "trinidad's", 8836: 'intelligent', 16492: 'rosehaugh', 16493: "schuster's", 9712: 'honda', 22342: 'chow', 13921: 'derose', 13922: 'solita', 13923: 'upstate', 3032: 'realized', 8837: 'manpower', 21179: 'realizes', 2003: 'houston', 21180: 'understate', 21181: "ambac's", 21182: 'rwnt', 16495: 'muto', 7321: 'hampshire', 16496: 'volta', 21183: 'coittee', 16497: 'perfect', 16498: 'succesfull', 21184: 'macalpine', 21185: 'lithographing', 7268: 'meantime', 13924: 'stalled', 10105: 'derivative', 21186: 'ezzedine', 21187: 'sabotaging', 10106: 'physicians', 16499: 'prosper', 13925: 'hainan', 21188: "seaway's", 10107: 'jeumont', 21189: 'snake', 5517: 'realize', 21190: 'morger', 27586: 'lamy', 10108: 'bowater', 4014: 'peking', 1261: 'shortage', 12165: 'quixote', 21191: 'homogeneous', 11053: 'suitors', 16500: 'marketech', 2664: 'deficiency', 2219: 'books', 13926: 'witness', 10109: 'makoto', 7038: 'fundamentally', 6709: "'", 16501: 'creditworthy', 4186: 'your', 16502: 'enskilda', 21192: 'unprepared', 21193: 'adaptive', 11055: 'burke', 21194: 'wdca', 16503: 'hyundai', 9415: 'armacost', 6299: 'adherence', 11056: 'keys', 21196: 'npct', 21197: "operation's", 16504: 'mainland', 4241: 'gallons', 111: 'could', 21199: 'chilly', 6185: 'length', 8839: 'ratification', 21200: 'camaguey', 16506: 'repeats', 21201: 'spectradyne', 16507: 'physik', 13927: 'westar', 16508: 'conant', 13928: 'vickers', 21202: 'protecionist', 21203: 'hydrogenated', 11057: 'scene', 21204: 'broilers', 11058: 'woodhead', 15880: 'cato', 9416: 'vetoed', 7613: 'feedgrain', 16509: 'ordering', 28320: "progressive's", 21205: 'backdoor', 21206: 'tucsonm', 878: 'interests', 4424: 'enforcement', 13929: "qatar's", 9417: 'vigorous', 4425: "sweden's", 21207: "saving's", 21208: 'chona', 6186: "europe's", 21209: 'cmcl', 21210: 'slaine', 21211: 'egregious', 21212: 'cmca', 8432: '949', 16510: 'orchestrated', 5700: 'false', 21214: 'shrinks', 2896: 'tonight', 17086: 'insele', 16511: 'richman', 16512: 'depict', 16513: "armco's", 1335: 'worldwide', 21215: 'manor', 21216: 'furious', 21217: 'nerio', 21218: 'regressing', 13930: 'instil', 3725: 'placement', 21219: 'bred', 21220: 'undersea', 16514: 'brew', 22410: 'horne', 21222: 'dependant', 21223: 'expediture', 5124: "analysts'", 21224: 'oilds', 21225: 'taps', 12166: 'jay', 21226: 'jaw', 16516: 'jar', 2227: 'terminal', 4683: 'entities', 14647: 'bombings', 6431: 'tape', 11059: "searle's", 21227: 'wirebars', 21228: 'undivided', 2940: 'dauster', 21229: 'prohibition', 21230: 'antagonise', 16518: 'molasses', 21231: 'wring', 10111: 'androsch', 2897: 'comprising', 1292: 'taxes', 21232: 'vying', 16519: 'stuff', 16520: 'aeg', 8374: "herrington's", 2055: 'ohio', 13931: 'guessing', 8840: 'frame', 1917: 'differentials', 11060: 'andover', 849: 'agricultural', 24097: 'postive', 6886: 'disc', 21234: 'destiny', 2119: 'nuclear', 16521: 'melrose', 21235: 'membrane', 16522: 'guayana', 21236: 'farrmers', 16523: 'p234', 3411: 'lawsuit', 21237: 'marty', 21238: 'extendable', 16524: 'perfidious', 16525: 'marts', 13933: 'erodable', 21239: 'keppel', 6432: 'bristol', 4833: 'transamerican', 4955: 'conclude', 21240: 'williamsville', 2566: 'midland', 12170: 'umuarama', 7987: 'kahn', 21241: 'kayaba', 21242: 'mergewr', 7269: 'entitle', 12167: 'valuations', 21243: "ccb's", 21244: 'swindling', 16526: 'allergan', 14674: 'ventra', 16527: 'goverments', 29336: 'sacrifice', 21245: 'cvs', 12168: 'cvn', 21246: 'gamesmanship', 16529: 'eaton', 10112: 'rbi', 16530: 'cvg', 7270: 'rbd', 21248: 'barite', 13823: 'nowruz', 21249: 'ransom', 12169: 'denial', 21250: 'ishrat', 5125: 'paulo', 21251: 'painstaking', 11061: 'gibbs', 13934: 'pires', 21252: "marine's", 9418: "quarter's", 21253: 'unsympathetic', 6710: 'identity', 7614: 'audit', 407: 'off', 21255: 'ofd', 1036: 'indonesia', 21256: 'karani', 6711: 'patterns', 17862: 'pham', 21258: 'administered', 4426: 'diversify', 4314: 'audio', 21259: 'phab', 10113: "vw's", 11062: 'newest', 21260: 'akira', 11063: 'messidor', 21261: 'lavorel', 21262: 'bolivianos', 11064: 'matsushita', 16532: 'jinja', 16533: 'wee', 21263: "asher's", 13935: 'wei', 10114: 'wen', 21264: 'wel', 21265: 'wer', 3900: 'wet', 13936: 'enfield', 21266: 'practise', 16534: 'villagers', 21267: 'normalise', 6188: 'cruz', 4684: 'zimbabwe', 21840: 'gambling', 21268: 'rioghts', 5954: 'pier', 1025: 'you', 21269: "squibb's", 21270: 'qlty', 16536: 'qltv', 829: 'become', 21271: "d'economie", 21272: 'actuarial', 21273: 'underwent', 21274: "attorney's", 10115: 'mozambique', 13937: 'choosing', 10116: 'flush', 21275: 'phibro', 5305: 'recognition', 9419: 'contested', 21277: 'optimisim', 21278: 'constitucion', 8375: 'shareholdings', 27599: 'satisified', 12171: 'brokering', 16537: 'rashadat', 691: 'pressure', 2374: 'guaranteed', 21279: 'iwai', 9420: 'lifestyle', 1262: 'debentures', 12172: 'prudente', 16538: 'materializing', 13939: 'swimming', 6712: 'letters', 11065: 'unfavourable', 16539: 'camden', 12173: "aga's", 16540: 'cultivated', 21280: 'unfavourably', 2690: 'terminated', 13941: 'reactivate', 21281: 'splintered', 5306: 'snyder', 7988: 'peters', 21282: 'frp', 16541: 'terminates', 11066: 'rosenmuller', 21283: 'herley', 6971: 'bagged', 21213: 'yarns', 21285: 'tossed', 7271: 'evident', 6189: 'placer', 21286: 'shrunk', 12174: 'excitement', 1951: 'placed', 908: 'problem', 21287: 'quarries', 12175: "governor's", 9290: 'viner', 21288: 'benelux', 21289: "o'malley", 8376: 'walters', 7615: 'effected', 214: 'compared', 6972: 'nonetheless', 12176: 'deadly', 8377: 'lately', 21290: 'saviour', 21291: 'stibnite', 21292: 'atorino', 1478: 'compares', 780: 'details', 13942: 'illusion', 8378: 'repeat', 5307: 'doubted', 21293: 'zhou', 6547: 'worker', 21294: 'lenzburg', 6713: 'outlets', 6190: 'repeal', 2593: 'exposure', 16542: 'searches', 16543: 'macluan', 21295: 'hepc', 21296: 'gardena', 16544: 'torrid', 2567: 'compete', 21297: 'searched', 16545: 'gardens', 10839: 'giorgio', 7616: 'magnetic', 4551: 'doubts', 21298: '0835', 16546: 'coopers', 9421: 'nursery', 12178: '0830', 12179: 'rekindle', 21299: '0838', 11067: 'worts', 16547: 'unanimity', 4552: 'spin', 11068: 'neighbours', 21300: 'orgnaization', 684: 'worth', 21301: 'alternating', 13944: 'stanely', 12180: 'perishable', 13945: 'aurora', 24105: 'durango', 21302: 'dlar', 21303: 'summarized', 21304: 'cmi', 16549: 'investigator', 5701: 'cme', 5702: 'cmb', 21305: 'samurai', 21306: 'notifying', 28941: "defense's", 2758: 'deadline', 12181: 'abolishing', 12182: 'cms', 13946: 'roldan', 16550: 'cmp', 6433: 'jamaica', 21308: 'campeche', 21309: 'sunbelt', 20755: 'participators', 11069: 'rouen', 21310: 'inflating', 3807: 'machines', 21311: 'subcontracts', 7989: 'employ', 5308: 'princeville', 21312: 'filtration', 16552: 'rockies', 13947: "salomon's", 21313: 'serivces', 21314: 'bipolar', 3412: 'petro', 13948: 'understandng', 21315: 'equaly', 7893: 'equals', 13949: 'redeploy', 1750: 'protection', 5627: '588', 21316: 'ploughed', 21317: '7860', 21318: 'formatura', 21319: 'neogtiating', 3082: 'obtained', 11186: 'fourteen', 6714: 'clarify', 12183: 'livingwell', 13950: 'bwb', 2631: 'stressed', 3638: 'postponed', 10119: 'wyss', 7617: '077', 24557: 'inequitable', 21321: 'bro', 13951: 'wyse', 4015: 'bra', 21322: 'plow', 12184: 'reflate', 13952: "pasar's", 2056: 'coins', 21323: 'ploy', 21324: 'administering', 16555: 'elias', 5518: 'separated', 16556: 'hutmann', 11070: 'fesharaki', 21325: 'mathew', 21326: 'regrouping', 4356: "unit's", 16557: 'separates', 11187: 'yard', 21327: 'adamant', 6434: 'blocking', 2941: '1991', 1501: '1990', 21328: 'insterest', 2465: '1992', 5519: '1995', 5520: '1994', 24606: 'err', 24608: 'ers', 11071: '1999', 12185: '1998', 21329: 'tne', 21330: 'tnd', 8379: 'era', 14797: 'containment', 8842: 'sdrs', 16558: 'mager', 5770: 'cardenas', 1410: 'indicated', 16559: '2867', 16560: 'deadweight', 10120: 'repos', 21332: 'flung', 10121: 'impair', 4210: 'indicates', 16561: 'wholesaling', 1640: 'totaled', 21333: "basix's", 21334: "platinum's", 1238: 'recovery', 5309: 'carriers', 717: 'provide', 21335: 'rationed', 21336: 'nuts', 12186: 'recovers', 11072: 'hollings', 5703: 'resignation', 8843: "imatron's", 21337: '286p', 21338: 'ladder', 1138: 'hong', 2307: 'customs', 13955: 'leuzzi', 6191: 'arco', 21339: 'complacent', 21340: 'spectrayne', 7273: 'schneider', 6192: 'appreciate', 24109: 'frederic', 16562: "smelter's", 12187: 'davies', 8380: 'mps', 16563: 'popsicle', 21341: 'slimmed', 16564: 'westphalia', 16565: 'rockport', 11073: 'innovative', 11074: 'priming', 16566: 'christie', 119: 'production', 16567: 'understated', 16568: "south's", 7274: 'valor', 16569: 'wyona', 4211: "purolator's", 16570: "holder's", 354: 'coffee', 5521: 'safe', 16572: 'hispanics', 13956: 'maracaibo', 8844: 'penetration', 13957: "mci's", 13958: 'saft', 21342: 'dissappointing', 13826: 'principals', 6193: 'reasonably', 623: 'l', 2278: 'reasonable', 11075: 'feeds', 21343: 'starft', 2057: 'dumping', 16574: "rha's", 21344: 'slew', 20760: 'inadvertently', 5522: 'danforth', 21346: 'diphenyl', 3351: 'daniel', 21347: 'ramapo', 21348: 'halefolu', 18094: 'forcible', 6715: 'barrier', 3033: 'disputes', 16575: 'volkskas', 13959: 'amadeus', 21349: 'petcord', 16576: 'detecting', 8381: 'bilion', 13960: 'certified', 16577: 'consents', 16578: 'transferability', 5310: 'renew', 21350: 'a340', 21351: 'render', 7275: 'railroads', 392: 'another', 1641: 'electronic', 16579: 'timothy', 16580: '2053', 21352: 'jackup', 21353: "sony's", 16581: '2058', 3484: 'takeovers', 1734: 'approximately', 21354: 'ziana', 1248: 'john', 21355: 'wastes', 4017: 'cereal', 21356: 'wasted', 16582: 'haubourdin', 9424: 'lucas', 13961: '205p', 9425: 'guild', 12188: 'policymaking', 21358: 'meteorology', 16583: 'equicor', 5704: 'historical', 2017: 'enhancement', 16584: 'sulbath', 21359: 'respecting', 21360: 'contigent', 9426: 'contents', 16585: 'praising', 16586: 'convenient', 16587: 'morristown', 21361: 'subjects', 4428: 'unsecured', 435: 'acquire', 21362: 'goldenball', 4956: 'bruce', 21363: 'indemnification', 16588: 'troughs', 21364: 'assesses', 507: 'germany', 14862: 'docemarte', 7990: 'assessed', 21366: "hunts'", 10122: 'grave', 21367: 'rosenkranz', 21368: 'bracken', 16589: 'swamp', 7619: 'pampa', 406: 'reserve', 10123: 'stephens', 21369: 'preproduction', 11076: "economists'", 4685: 'lewis', 21370: 'agcny', 21371: 'intergovernmental', 21372: 'eider', 7151: 'completes', 7276: 'marketings', 21373: 'eidem', 16591: 'degradation', 16592: 'downtime', 9427: 'policymakers', 21374: 'troutman', 16593: 'hallmarked', 2691: 'runs', 15022: 'mepc', 21376: 'boulangerie', 6435: 'krupp', 13962: 'reintroducing', 10124: 'emi', 22164: 'djibouti', 7991: 'dupont', 21377: 'mnln', 13963: "children's", 22034: 'rune', 14285: 'emp', 1918: 'ems', 16596: 'emr', 21378: 'democratica', 21379: 'freshwater', 12190: "outokumpu's", 481: 'secretary', 7277: 'turmoil', 21380: 'electrostatic', 16597: 'doyon', 1188: 'discussions', 13965: 'optimum', 21381: 'frontline', 5126: 'techniques', 16598: 'charmglow', 13966: 'draws', 21382: 'pasted', 1595: 'away', 16599: 'bracing', 1079: 'cooperation', 3558: 'drawn', 20769: 'paluszek', 16600: 'misguided', 16601: 'shields', 16602: 'awal', 5705: 'assays', 16603: 'handful', 16604: 'cfas', 13967: 'travellers', 13968: 'multivest', 3902: 'climate', 16605: 'canuc', 4315: "countries'", 16606: 'snowfalls', 21384: 'inteview', 16607: 'lousiana', 16608: 'siaahan', 16609: 'zheng', 21375: 'wrested', 4834: 'tone', 21386: 'adminstrative', 832: 'tons', 21387: 'applicability', 8382: 'tony', 16610: "sainsbury's", 1208: 'telecommunications', 21389: 'tolerances', 13969: 'stockdraws', 25874: 'bni', 10125: 'pglo', 21392: '6689', 21393: "pont's", 5059: 'fluctuation', 21395: 'nowshahr', 21396: '6684', 13970: 'metalicos', 16611: 'lockup', 16612: 'superpowers', 3413: 'attacked', 11567: 'wrather', 12660: 'divisor', 1080: 'gnp', 9428: 'reciprocal', 6716: "emery's", 12191: 'gnb', 21398: 'seekiong', 6717: "sosnoff's", 21399: 'gng', 27624: 'hager', 26832: '35th', 21400: 'cylinder', 12192: 'cons', 6973: 'cont', 7322: 'uganda', 16615: 'vogel', 21401: 'pipework', 16616: 'separatist', 11077: 'tissue', 21402: 'bastia', 16617: 'succed', 7992: 'conn', 16618: 'onwed', 21403: '160s', 25138: 'prtax', 13971: 'wheel', 21404: 'hani', 10126: 'counterparty', 9429: 'hang', 8846: 'counterparts', 2144: 'hand', 5706: 'hans', 13972: 'vermont', 10127: 'whereby', 11078: '1600', 21405: 'microfiltration', 9430: 'client', 21406: 'mizel', 3880: 'easily', 21407: 'tabling', 7278: 'mclean', 8847: 'intercare', 6436: 'edmonton', 21408: 'thunholm', 7620: 'thanks', 21409: 'euaring', 6974: 'citgo', 4018: 'jose', 7621: 'yamaha', 21410: 'oiwned', 2466: 'amoco', 5127: 'wright', 16619: "commonwealth's", 5691: 'istat', 13973: 'shoup', 21412: 'openings', 13974: 'siegfried', 21413: 'insensitive', 12193: 'fsis', 13975: 'lce', 25199: 'groep', 13976: 'designers', 8848: 'unfounded', 10128: 'eroded', 27629: 'autocratic', 1860: 'tendered', 12194: 'cooler', 1402: 'night', 12195: 'cooled', 13977: 'trained', 16621: 'curazao', 16622: 'born', 16623: 'bork', 13978: 'flatten', 1491: 'borg', 16624: 'bore', 21415: 'borc', 16625: 'confusing', 21416: 'juen', 16626: 'gasoil', 16627: 'haslam', 21417: 'maricorp', 16628: "'i'm", 2692: 'asking', 16629: 'cerro', 1843: 'participation', 21418: 'ratably', 11080: 'architectural', 12196: 'substitution', 16630: 'peer', 21419: 'herpes', 11081: 'gartner', 8383: "airline's", 16631: 'gallego', 3352: 'jacques', 16632: 'wasting', 7249: 'attorney', 10729: 'accruals', 16633: 'rendering', 21420: 'obligate', 13979: 'lavan', 21421: 'laval', 21422: 'ruvuma', 21423: 'wanamaker', 21424: 'peistner', 21425: 'envirodyne', 10129: 'refloat', 6195: 'sovereign', 2594: 'conservation', 12197: 'wrongdoing', 21426: 'threshhold', 5955: 'diligence', 11082: 'garages', 21427: 'needles', 13980: 'catalyst', 13981: "philips'", 21428: 'vlans', 21429: 'qassimi', 13982: 'isis', 21430: 'reproductive', 9431: 'ecgd', 1469: 'maximum', 21431: 'maximun', 1525: 'different', 16634: 'duggan', 21432: 'plymouth', 21433: 'quartger', 4429: 'unleaded', 16635: "marion's", 1317: 'request', 16636: "dixon's", 6437: 'expertise', 21434: 'prayer', 21435: 'crediting', 1735: 'test', 21436: 'pursestrings', 21437: 'bellevue', 16638: "o'clock", 16639: 'mulling', 9432: 'update', 13983: "let's", 7993: 'ammeen', 9433: 'interval', 21438: 'concorde', 11083: 'beds', 6196: 'concept', 67: '0', 13984: 'diplomacy', 16640: 'redeployment', 21439: 'cryssen', 8849: 'supplement', 21440: "citybank's", 2595: 'battle', 21441: 'tenable', 21442: 'layers', 16641: 'soothed', 3903: 'heller', 7994: 'learjet', 7995: 'dissatisfied', 8850: 'amlc', 13985: 'pson', 21443: 'dartmouth', 16642: 'chainwide', 18375: "'triple", 3129: 'boosting', 12198: 'automobiles', 21445: 'kcsi', 18382: 'airsignal', 4957: 'andriessen', 21447: 'evaded', 21448: 'flammable', 8384: '12th', 13986: 'gun', 21449: 'gum', 12199: 'gus', 21450: 'sooon', 13987: 'gut', 12200: 'guy', 12201: 'alfa', 21451: 'verifiable', 8385: 'nortek', 16644: 'ukasta', 416: 'cost', 21452: 'alfs', 21453: 'forging', 16645: 'krpg', 7430: 'substitutes', 12202: 'kaines', 12203: 'idemitsu', 21455: 'prefential', 8851: 'smartmodem', 6438: 'petromin', 76: 'shares', 4019: 'shared', 6837: 'hajime', 16646: '43p', 13988: 'cranston', 24131: 'ning', 13989: 'giles', 16648: 'defict', 21456: 'teaches', 13990: 'teacher', 13991: 'mcfadden', 6439: 'sending', 16649: "metallgesellschaft's", 11084: 'burdensome', 12204: 'buchbinder', 4430: 'franklin', 5128: '438', 4958: '439', 18410: 'relected', 4559: '437', 5311: '434', 3904: '435', 3245: '432', 4212: '433', 2397: '430', 6197: '431', 5312: 'regardless', 25536: 'fbt', 6718: 'fbc', 16650: 'bjoern', 21457: "faith's", 16651: 'slackens', 18421: '32876', 8852: 'roastings', 21458: 'slough', 2364: 'rainfall', 16653: 'cereais', 21460: 'fereidun', 16654: 'prepay', 13992: 'ibj', 21461: 'ibl', 2365: 'ibm', 16655: 'ibn', 2596: 'ibc', 8386: "standard's", 21462: 'purge', 16656: 'ibp', 999: 'totalled', 21463: 'submachineguns', 3083: 'trans', 16657: 'systemone', 16658: 'infoguard', 2036: 'chip', 4835: "didn't", 16659: 'chit', 21464: 'chik', 21465: 'chin', 21466: 'nastro', 16660: 'dekalb', 13993: 'chia', 2766: 'occur', 16661: 'dialysis', 21467: 'statemnet', 2767: 'discussion', 7280: 'spreads', 4431: "state's", 11085: 'deteriorate', 539: 'product', 21468: "suppliers'", 21469: 'substructure', 11086: 'peerless', 21470: 'dampened', 1043: 'produce', 13994: 'shortcovering', 4213: 'drastic', 21471: 'noses', 21472: 'grandson', 7281: 'csra', 9434: 'irwin', 7623: 'corona', 14236: "delhi's", 16662: 'kurz', 12205: 'kurt', 21474: 'unkonwn', 16663: 'chops', 6975: 'serving', 21475: "glaxo's", 270: 'ended', 21476: 'disparities', 322: 'still', 21477: 'equalling', 16665: '324p', 21478: 'saxon', 16666: 'factual', 21357: "pipelines'", 355: 'non', 13995: 'noh', 3559: 'introduce', 25700: 'noc', 21480: 'elecetric', 10130: 'overhanging', 11087: 'wealthy', 42: 'not', 3905: 'nov', 186: 'now', 2245: 'nor', 7431: 'undermining', 496: 'drop', 9715: 'magazines', 12206: 'unloaded', 6440: 'raises', 10526: 'monsod', 6198: 'challenged', 5707: 'challenges', 9716: 'soy', 13996: 'polices', 26: 'year', 12207: 'naming', 10131: 'monitors', 21483: 'lettershop', 3130: 'bullish', 5956: 'furthermore', 13997: 'shelling', 1522: 'hectares', 21484: "wtc's", 21485: '460p', 5523: 'fargo', 16669: '51p', 21486: 'sactions', 19880: 'foray', 8730: 'derived', 16671: 'divergence', 5130: 'diagnostic', 7624: 'advantages', 10132: "usx's", 16672: 'repayable', 16673: 'haefner', 16674: 'misima', 5957: 'transition', 8387: '519', 4432: '518', 21487: 'janata', 7282: 'invitation', 4686: '511', 3560: '510', 5958: '513', 5524: '512', 3808: '515', 5959: '514', 3809: '517', 4409: '516', 13998: 'blind', 6441: 'romania', 21488: 'rino', 16675: 'rinn', 16676: 'rini', 7625: 'ring', 21489: 'scandivanian', 851: 'tomorrow', 9435: 'grosso', 21490: 'hodes', 6719: 'caught', 21491: 'respectviely', 13999: 'reinvestment', 16677: 'reorganising', 21492: 'remotely', 21493: 'savona', 4687: 'hodel', 21494: 'pny', 14000: 'grossa', 25827: 'asymmetric', 15093: 'pnp', 12208: 'schlecht', 21496: 'pnh', 21497: 'acquainting', 16678: 'pnb', 2429: 'interbank', 6977: 'png', 12209: 'greenwood', 11088: 'professionals', 5817: 'underwritten', 5708: 'bauxite', 4316: 'transferred', 9612: 'frictions', 9191: 'overhang', 7283: 'erbynn', 16679: "amro's", 16680: 'rubbermaid', 24142: 'mediterranee', 16681: 'inpatient', 21500: 'electrinical', 21501: 'windsor', 14001: '0400', 12210: 'rtas', 25867: 'ginneries', 12211: "maekawa's", 761: 'equipment', 11090: 'inkey', 16682: 'authorising', 5709: 'cordoba', 16683: 'attractiveness', 16684: 'importantly', 16685: 'numac', 922: 'america', 5334: 'imposition', 21503: 'wildwood', 21504: 'mrt', 1358: 'reform', 1566: 'steady', 21505: 'mrs', 5313: 'lusinchi', 21506: 'mrk', 13785: 'mits', 18683: 'levesque', 21507: 'antioquia', 4688: "venezuela's", 10134: 'teamed', 12212: "utilicorp's", 16686: 'snack', 21508: 'daas', 21509: 'traning', 21510: 'downturns', 18583: 'filipinos', 7996: 'ameri', 14003: 'enriched', 21512: 'daan', 2569: 'interstate', 12213: "consolidated's", 14004: "sudan's", 21513: 'arauca', 21514: 'unneccessarily', 21515: 'twinjet', 21516: "gf's", 12214: 'burst', 21517: 'hoel', 21518: 'anchored', 2943: 'actively', 21519: 'recalcitrant', 7997: 'sandoz', 21520: 'staffed', 9436: "champlin's", 11091: 'staffer', 24147: 'environmentally', 8853: 'gazette', 12983: 'unanimous', 16687: 'therapies', 12215: 'schuster', 21522: 'computors', 14005: 'nelissen', 12216: 'hurts', 21523: 'ccml', 16688: 'impediments', 21524: 'attache', 21525: 'hybrids', 21526: 'jeans', 21527: "wireless's", 26011: 'hvy', 10135: 'exploit', 2129: '152', 21529: 'underwaty', 5314: 'tropical', 14006: '1030', 16690: 'odessa', 21530: 'krone', 10136: 'discontinuing', 9437: 'aero', 21531: 'kplm', 17423: 'obscuring', 21532: 'masonry', 21533: "seedman's", 21534: 'chanthong', 21535: 'whittar', 1098: 'transactions', 21536: 'baoshan', 21537: 'bernbach', 13009: "goodyear's", 5825: 'metric', 26083: 'econonic', 9438: 'beaver', 4433: 'waterway', 1903: 'develop', 21539: 'czechoslovakian', 16692: 'permament', 4836: 'inquiry', 16079: 'zealander', 10138: 'frazier', 21540: 'unbudgeted', 21541: "agricole's", 6557: 'arctic', 10139: 'foam', 16693: 'teran', 12218: 'gaspar', 26122: '8085', 8388: 'westin', 21542: '8087', 21543: 'oien', 4020: 'congressmen', 7284: 'alleviate', 14008: 'taxi', 21544: 'ilaqua', 2467: 'livestock', 27653: 'protocomdevices', 16694: 'battleship', 21545: 'typhus', 4021: 'foot', 7847: 'relevant', 21546: 'greetings', 16695: 'neoy', 4959: 'gaviria', 1870: 'renewed', 10140: "hutton's", 12220: 'mmal', 16696: 'versa', 14010: 'overbought', 21547: 'disproportionate', 6978: 'ami', 21548: 'propped', 14011: 'hemsley', 16697: "total's", 14012: 'laundering', 9439: 'amo', 10141: 'psc', 3131: 'amc', 7285: 'psa', 10142: 'ame', 23753: 'psg', 8854: 'psd', 10143: 'pse', 26183: 'amx', 16699: 'psy', 16700: 'unwrought', 7627: 'amp', 21549: 'ams', 7628: 'amr', 8389: 'amt', 16701: 'pst', 5525: 'fortune', 6442: 'heightened', 16702: 'unrequited', 10144: 'conducts', 2120: 'annually', 21550: 'reocrd', 14013: 'oeien', 14014: 'captial', 21551: "gcc's", 7286: 'twelve', 12221: 'verbal', 12927: 'greenery', 21552: 'tragedies', 7287: 'cathode', 21553: 'envoys', 21554: 'nucleic', 21555: 'rinderpest', 8390: 'netbacks', 14015: 'repubblica', 6199: 'kaiser', 21556: '1850', 4625: 'models', 9440: 'guillaume', 14016: 'kennecott', 21558: 'nuggets', 5960: 'assembly', 3084: 'sixth', 26260: 'assemble', 9107: 'exciting', 12223: 'veslefrikk', 21560: 'overrall', 21561: 'casuals', 21562: 'whbf', 21563: 'ideology', 14017: 'trailer', 21564: '7940', 18717: 'antibody', 9441: 'siddeley', 26276: 'mannheim', 1189: 'outlook', 3414: 'stabilisation', 8855: 'shrinking', 12224: 'mcgroarty', 12225: 'qualifying', 8856: 'hertz', 20828: 'suppositions', 621: 'start', 21566: "nordbanken's", 21567: 'politeness', 21568: 'afobaka', 6720: 'rjr', 21569: 'ealier', 16704: 'shipbreaking', 10146: 'smuggling', 1709: 'delayed', 22180: 'neccesarily', 16705: 'manipulative', 21570: 'recouped', 16370: 'pitched', 21572: 'becasue', 4960: 'fraud', 4560: 'default', 8391: 'kresge', 21573: 'besse', 16706: 'realigned', 21574: "as'", 4317: '605', 5961: '604', 4837: '607', 5131: '606', 5526: '601', 636: '600', 4022: '603', 3906: '602', 4961: '609', 4689: '608', 16707: 'leroux', 21575: 'moneyline', 21576: 'appraising', 12226: 'terminating', 4214: 'forcing', 1895: 'poor', 21577: '1350', 14018: 'ineligible', 21578: '60p', 4561: 'unocal', 21579: 'cazale', 21580: 'wallace', 21581: 'endeavors', 14019: 'dubuque', 21583: 'montly', 4562: 'queensland', 7288: 'pool', 11093: 'hambrecht', 1771: 'bulk', 16708: 'corel', 5315: 'roasters', 21584: 'corea', 3907: 'stockpile', 21585: "ross's", 1070: 'overseas', 16709: 'monte', 14020: 'ceases', 136: 'month', 8178: 'ceased', 12228: 'religious', 21587: 'broadbased', 6979: 'conger', 14021: 'surpised', 9442: 'wmx', 2510: 'chain', 2693: 'pledged', 8857: 'wyoming', 21588: 'insulate', 12229: 'lammers', 22474: 'locomotives', 8858: 'wmc', 21591: 'wmb', 5710: 'pledges', 14022: 'resounding', 12230: 'comercial', 21592: 'augusto', 21593: 'horror', 16711: 'witching', 21594: "hollinger's", 2023: 'decide', 21595: 'anat', 5962: 'targetted', 24155: '237p', 15248: 'promac', 14023: 'kondo', 21599: "o'sullivan", 10147: 'reaffirming', 26456: '385p', 21600: 'reformate', 14024: 'brucellosis', 10148: "darman's", 21601: 'streets', 10149: 'expansive', 3769: 'darman', 7998: 'enjoin', 11094: 'hamper', 11095: 'dunn', 4838: 'investing', 16713: 'ast', 21602: 'microbiology', 14026: 'paemen', 16714: 'bernhard', 8859: 'learned', 13857: 'ldbrinkman', 21603: 'richarson', 11096: 'tracks', 26499: 'acadia', 19468: 'halfway', 1606: 'excess', 26504: "petranol's", 14027: 'arena', 12231: 'conviction', 8860: 'geodyne', 505: 'losses', 620: 'base', 4839: 'advertising', 16717: 'nesbitt', 16718: 'successors', 21605: 'wallingford', 3679: 'requiring', 10150: 'steinhaeuser', 2121: 'ask', 14028: '700p', 21606: 'warplnes', 5132: 'conventional', 21607: 'heartened', 1411: 'oecd', 16719: 'oecf', 5711: 'mcivor', 4963: 'brunswick', 3561: 'bankamerica', 10151: 'pazzionotto', 14029: 'hannes', 16720: 'indenture', 21608: 'lazaro', 16721: 'garza', 21609: 'dior', 21610: '7001', 21611: '7000', 14030: 'lazard', 811: 'producing', 21612: 'antigone', 21613: 'grill', 7999: 'consortia', 21614: 'budgets', 12232: 'byers', 4023: '740', 4690: '741', 5133: '742', 6200: '743', 5527: '744', 5134: '745', 6721: '746', 4103: '747', 7289: '748', 6980: '749', 14031: 'phrase', 21615: 'publicise', 24161: 'cheering', 16722: 'mcdp', 21617: 'utilize', 26609: 'mongstad', 4318: 'reject', 17446: 'vigilance', 7371: '1970', 8861: 'schultz', 21619: 'koppabergs', 16723: 'communicating', 21620: 'winnebago', 9443: 'interfirst', 16724: 'solters', 21621: "jacobson's", 12233: 'compulsory', 21622: 'sesdaq', 21623: 'criticize', 21624: 'recnogized', 16937: 'mobilise', 10152: 'anytime', 22482: '401p', 21627: 'mangood', 23762: "spa's", 10153: 'thermal', 12234: "idc's", 21628: 'wishful', 21629: 'heron', 14032: 'bywater', 6512: 'territorial', 14033: 'mitek', 644: 'systems', 8393: 'differed', 16727: 'founders', 4691: "haven't", 21630: 'misalignment', 5963: 'evening', 16728: 'curtis', 16729: 'cea', 11097: 'pettee', 16730: 'ces', 26692: 'commodore', 16731: 'muskeg', 14035: 'mcadoo', 10154: 'burkhard', 11098: 'bryan', 16732: 'healthmate', 21631: "financiers'", 6722: 'liquefied', 21632: 'obligates', 21633: 'unmerciful', 16733: 'fairs', 14036: 'predators', 21634: 'radial', 11099: 'obligated', 21635: 'lifestyles', 9444: 'predatory', 425: 'barrels', 21636: 'exasperated', 21637: 'syacmore', 7290: 'punitive', 16734: 'jawa', 21638: 'anarchy', 21639: 'unsurpassed', 16735: 'testify', 16736: 'jolly', 3191: 'safety', 58: '7', 26732: 'jolla', 21641: "america'", 14037: 'schaik', 12235: "ariadne's", 21642: 'multnomah', 21643: 'freeboard', 11100: 'earns', 16737: 'housed', 6201: 'favored', 27678: 'foreland', 1671: 'houses', 21645: 'unsightly', 16738: 'brightest', 14038: 'concessionary', 21646: 'grovewood', 1205: 'speculation', 6202: 'unresolved', 14039: 'americas', 231: 'american', 11101: 'frenzel', 11102: 'yaik', 6723: 'barwon', 14040: 'morita', 16739: 'tf1', 10155: 'horse', 21649: 'dashwood', 11103: 'canbra', 2898: 'station', 9445: 'gibraltar', 6443: 'hundred', 5964: 'raws', 5135: 'trapped', 4692: 'ascs', 21651: '1912', 26799: 'asca', 16740: '1914', 8862: 'rawl', 1386: 'grew', 21653: 'gres', 21654: 'grex', 6981: 'grey', 21655: 'greb', 21656: "cooper's", 1682: 'toward', 10156: 'procedural', 5528: '883', 6444: '882', 5529: '881', 4964: '880', 6203: '887', 6204: '886', 5712: '885', 4840: '884', 5530: '889', 5136: '888', 8394: 'deflationary', 16741: 'committment', 21657: 'organs', 21658: 'associatioon', 21659: 'cherokee', 21660: "protection's", 16742: 'aired', 5336: 'redeemable', 21662: "worker's", 8863: 'hired', 16743: 'channeling', 4031: 'comparisons', 6982: 'soriano', 21664: 'channelink', 4841: 'privatised', 16744: 'bzw', 9902: 'levied', 26874: 'moonshine', 21665: 'confinement', 16745: 'outpatients', 6724: 'divestiture', 14041: 'tubes', 11104: 'jeopardy', 21666: 'celebrated', 8395: 'phenomenon', 8000: 'hovered', 9951: 'taxable', 21667: 'broadacasting', 6445: "nations'", 21668: 'leandro', 12236: 'zakum', 21669: 'distressing', 2430: 'justice', 11105: 'quel', 21670: 'criticising', 3640: 'rotterdam', 7438: 'grocery', 4842: 'feels', 3034: 'competing', 10157: 'combinations', 16747: "'silent", 21671: 'stopwork', 21672: 'pneumatics', 6983: 'adhering', 1255: 'retaliation', 12237: 'alsthom', 5714: 'stored', 21673: 'depresed', 21674: 'stn', 12238: 'lornex', 21675: 'hplg', 14042: 'becher', 12239: 'culture', 21676: "linde's", 21677: 'scandinavian', 3908: 'locals', 8001: 'pictures', 14043: 'tolls', 11106: 'stagnation', 8396: 'missing', 10158: 'ranked', 16749: 'dowty', 14044: 'abruptly', 8865: 'league', 5316: "wouldn't", 3562: 'minorities', 10159: 'canadians', 27001: 'experimental', 21678: 'hairdressing', 21679: 'servotronics', 12240: "edelman's", 12241: 'jewelmasters', 7721: 'deciding', 14762: 'waterside', 8002: 'subsidised', 16751: 'oamcaf', 16752: 'subsidises', 21681: 'tailgates', 4434: 'threaten', 16753: 'instructing', 8866: 'empty', 21682: 'tentiative', 5531: 'lived', 21683: 'billoin', 21684: 'propositions', 21685: 'sievers', 21686: 'amendemnt', 21687: 'haecke', 8867: 'lives', 12242: 'mombasa', 610: 'pact', 21688: 'derferred', 12219: 'warexin', 2472: '285', 8868: 'wyatt', 2398: 'pace', 16755: 'kara', 23302: 'adh', 14045: 'guido', 21690: 'rosecraft', 21691: 'paco', 9446: 'richter', 6446: 'guide', 9447: 'pack', 4693: 'costly', 21692: 'petal', 16756: 'kroger', 21693: 'dynavision', 12243: 'handelsblatt', 16757: 'payers', 11107: 'albany', 3132: 'grant', 21694: "cityquest's", 21695: 'albani', 4694: 'vulnerable', 4965: 'grand', 16758: 'pontins', 14046: 'composition', 7291: "turkey's", 9448: 'fatty', 5965: 'ratings', 12244: 'spedding', 7292: 'blair', 6984: '003', 5715: '002', 4843: '001', 15: '000', 6725: '007', 5966: '006', 5532: '005', 6726: '004', 6985: '009', 4966: '008', 6727: 'arturo', 21696: 'employement', 21697: 'poona', 4967: 'obviously', 21698: 'consistency', 21699: 'previoiusly', 2097: 'speculative', 16759: 'labatt', 21700: 'promedico', 8869: 'bearer', 14047: 'powerine', 5533: 'settlements', 4025: 'reviewed', 14048: '313p', 21701: 'mupawose', 21702: "michigan's", 21703: 'defintive', 12245: 'oporto', 2807: 'informal', 5317: 'nikko', 21704: 'robertshaw', 16760: "icco's", 16761: 'excavators', 4435: 'questioned', 21705: 'panamerica', 12246: 'rahim', 2468: 'showing', 14049: 'cgee', 5716: 'dubai', 21706: 'fssl', 2944: 'conable', 16762: 'dearly', 14050: 'cger', 21707: 'ponds', 21708: 'preopening', 1691: 'excluding', 21710: 'aabex', 14051: 'envisioned', 11108: 'stagnate', 2058: 'eep', 21711: 'buckhannon', 14052: 'eei', 4844: 'popular', 21712: 'softness', 16763: 'eec', 7629: 'lipc', 16764: 'caspian', 7977: 'petroles', 16765: 'conex', 165: 'economic', 14054: 'toufexis', 21714: 'impoverish', 27218: 'trotters', 14697: 'minebea', 11110: 'undergone', 21716: 'rht', 21717: 'viewpoints', 14056: 'negatives', 21718: 'postions', 16766: 'worgroce', 16767: 'bgl', 5318: 'papua', 21719: 'tananbaum', 10160: 'iomega', 8735: 'alpha', 21720: 'presupposes', 16768: 'purina', 12248: 'institut', 4436: 'bangladesh', 5534: 'placing', 21721: 'moseley', 5967: 'withholding', 21722: 'leduc', 21723: 'chimbote', 21724: 'v2500', 21725: 'edmin', 21726: 'mulberry', 4563: 'injection', 8292: 'seton', 16769: 'vacancies', 12249: 'munim', 1020: 'similar', 14059: 'nza', 2694: 'ordered', 21727: 'interventions', 5717: 'reopening', 16770: 'janatha', 14060: 'recalculated', 21729: 'ancestors', 1387: 'amounts', 14061: 'dashed', 1844: 'fears', 16771: 'feedlots', 1562: 'application', 9449: 'arabica', 21730: 'mondanto', 16772: 'haft', 210: 'department', 2730: 'manhattan', 21731: 'arithmetic', 16773: '5165', 21732: 'wohl', 8397: 'assayed', 13526: 'laroche', 8398: "firms'", 7829: "singapore's", 21734: 'svm', 1006: 'e', 16774: 'orbis', 6986: 'resolving', 14062: 'orbit', 12251: 'utilised', 14063: 'svz', 16775: 'bribe', 21735: 'imprecise', 5319: 'denman', 8399: 'compact', 21736: "eskey's", 21737: 'lkb', 1618: 'nippon', 10161: 'concessional', 21738: 'previoius', 14064: 'squaring', 3133: 'friendly', 16776: 'fieldcrest', 10162: 'miniere', 16777: 'rebounding', 1299: 'acreage', 3726: 'telling', 10163: 'sugarbeet', 21739: 'mauritania', 11111: 'csbc', 1479: 'positions', 21740: 'compassionate', 1845: 'michael', 14065: 'watered', 6987: 'ryan', 21741: 'csbp', 14066: 'geogia', 4800: 'certs', 8870: 'enforce', 9450: 'lazier', 21743: '8231', 12252: 'facilitating', 21744: "soybean's", 19206: 'auxton', 27459: 'okesman', 4215: 'jump', 3874: 'multilateral', 16778: 'tuobin', 12253: 'harmonious', 10164: 'brunei', 3909: 'becor', 14068: 'upsetting', 21746: 'madelin', 21747: 'japsper', 21748: 'mccrae', 6988: "cyacq's", 3727: 'convert', 21749: 'walkman', 21750: 'lyell', 16779: "nino's", 16780: 'genm', 21751: 'convern', 9451: 'gene', 16781: 'genb', 6728: 'patents', 9452: 'biopharmaceutics', 5535: 'foresee', 3246: 'clark', 11113: "manhattan's", 4564: 'manage', 12254: 'clara', 21752: 'revelving', 8400: 'buildup', 21753: 'expoloration', 21754: 'kraftwerk', 21755: '595p', 10165: 'camera', 7293: 'concepcion', 13645: 'asahi', 16782: 'depreciate', 8402: 'surveyed', 21756: 'isao', 16783: 'isam', 3563: 'formally', 4732: 'pretty', 21758: 'isab', 16784: 'visibility', 16785: 'ecop', 6729: 'warplanes', 21759: 'rivalry', 3247: 'appointed', 9453: 'keswick', 14069: 'stafford', 9454: 'lapse', 1628: 'averaged', 1710: 'recession', 20840: 'halldor', 16786: 'servants', 21760: 'reafforestation', 2788: '238', 21761: "dockers'", 8403: 'averages', 4105: 'withdrew', 6447: 'mees', 21762: 'ittihad', 4845: 'links', 22508: 'amtr', 1258: 'allowed', 11392: 'woodside', 16788: 'pulling', 1723: 'sought', 2123: 'dairy', 21764: 'incudes', 21765: 'defective', 2525: '230', 12256: 'narr', 21767: 'ordinaries', 21768: 'everly', 16789: 'sentiments', 21769: 'instinctively', 21770: 'filament', 14071: 'fare', 462: 'farm', 21771: 'faro', 21772: 'oilfileds', 21773: 'thunderstorm', 3415: 'ronald', 8404: "organization's", 21774: 'gebrueder', 14072: "fermenta's", 16790: 'indemnifying', 16791: 'vienot', 21775: 'surfrace', 11910: 'checkoff', 21776: 'alava', 345: 'including', 21777: 'acuqisition', 21778: "copy's", 9980: "adsteam's", 5137: 'temporao', 21779: "guaranty's", 21780: 'degadillo', 21781: 'articulation', 6205: 'uccel', 21782: 'eckhart', 16792: 'yugo', 16793: 'purusant', 12257: 'kilometres', 21783: 'overcharges', 3485: 'university', 11115: 'anzola', 2768: 'slide', 21784: 'firings', 10531: 'undertook', 3416: 'prevailing', 16795: 'shortfull', 21785: 'greyerz', 5968: 'constitute', 16796: 'investigative', 16797: 'separating', 16798:
# 출력되는 단어들을 공백을 줘서 연결(문장처럼 만들기)
print(' '.join([word_of_news[key] for key in X_train[0]]))
the wattie nondiscriminatory mln loss for plc said at only ended said commonwealth could 1 traders now april 0 a after said from 1985 and from foreign 000 april 0 prices its account year a but in this mln home an states earlier and rise and revs vs 000 its 16 vs 000 a but 3 psbr oils several and shareholders and dividend vs 000 its all 4 vs 000 1 mln agreed largely april 0 are 2 states will billion total and against 000 pct dlrs
# .join 예시
print(' '.join(['오늘','점심','메뉴는','무엇이 좋을까요?']))
오늘 점심 메뉴는 무엇이 좋을까요?
# 향상된 for문 원본 예시 (' '.join([word_of_news[key] for key in X_train[0]]))
train_len = []
for key in X_train[0]:
train_len.append(word_of_news[key])
print(' '.join(train_len))
the wattie nondiscriminatory mln loss for plc said at only ended said commonwealth could 1 traders now april 0 a after said from 1985 and from foreign 000 april 0 prices its account year a but in this mln home an states earlier and rise and revs vs 000 its 16 vs 000 a but 3 psbr oils several and shareholders and dividend vs 000 its all 4 vs 000 1 mln agreed largely april 0 are 2 states will billion total and against 000 pct dlrs
# 뉴스의 주제 개수를 알아보자 - 46가지 종류의 주제가 존재함
np.unique(y_train)
array([ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16,
17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33,
34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45])
- CNN도 이미지 데이터를 모두 같은 크기로 변경시켰듯이 RNN도 뉴스기사의 단어 개수를 같이 맞춰줘야함 (RNN 신경망의 timesteps는 일정해야 하기 때문에)
- 긴 기사는 잘라내고, 짧은 기사는 붙여 넣어줘야함(padding)
# 전체 뉴스기사(8982개)의 기사별 단어 개수를 구해서 list에 담아줌
train_len = []
for news in X_train:
train_len.append(len(news))
# 향상된 for문
train_len = [len(news) for news in X_train]
print('최대값 : ', max(train_len))
print('최소값 : ', min(train_len))
print('평균값 : ', np.mean(train_len))
print('중앙값 : ', np.median(train_len))
최대값 : 2376
최소값 : 13
평균값 : 145.5398574927633
중앙값 : 95.0
# 히스토그램으로 데이터의 밀도를 확인해보자!
# 가로축은 뉴스 길이를 각 구간별로 표시, 세로축은 뉴스기사의 개수
plt.hist(train_len, bins=20) # bins : 전체 길이에서 몇 개의 구간으로 나눌지 결정
plt.xlabel('news_len')
plt.ylabel('news_count')
plt.show()
# 라인차트로 각 기사별로 단어의 개수가 얼만큼 되는지 대략적으로 파악해보자!
plt.figure(figsize=(15,5))
plt.plot(train_len)
plt.xlabel('news_index')
plt.ylabel('word_len')
plt.show

문제 데이터 가공
- 전체 단어 개수의 분포를 고려해 가장 많은 개수를 차지하는 120개로 맞춰주자 (단어를 120개씩 설정해서 120번씩 순환시켜주자)
- RNN 신경망은 같은 timesteps로 맞춰줘야하기 때문에 사전에 데이터의 길이를 같게 맞춰줘야함
- 즉, 뉴스 기사의 단어 길이를 120회로 순환 학습하여 어떤 주제의 기사인지를 분류하는 문제라고 볼 수 있음
# RNN신경망의 순환횟수(시퀀스)를 변경시켜주는 라이브러리
from tensorflow.keras.preprocessing.sequence import pad_sequences
# X_train, X_test에서 시퀀스를 120으로 설정
X_train_pad = pad_sequences(X_train, maxlen=120)
X_test_pad = pad_sequences(X_test, maxlen=120)
X_train_pad[0]
# RNN에서 padding을 진행하면 앞에서부터 0이 채워짐
# -> 뒤쪽에 0이 들어가면 후반부 연산에서 값이 제대로 전달되지 않을 수 있음
# 초반에는 신경망에 0 값이 들어가서 아무런 연산이 일어나지 않고 실제연산은 1부터 진햄
array([ 0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 1, 27595, 28842,
8, 43, 10, 447, 5, 25, 207, 270, 5,
3095, 111, 16, 369, 186, 90, 67, 7, 89,
5, 19, 102, 6, 19, 124, 15, 90, 67,
84, 22, 482, 26, 7, 48, 4, 49, 8,
864, 39, 209, 154, 6, 151, 6, 83, 11,
15, 22, 155, 11, 15, 7, 48, 9, 4579,
1005, 504, 6, 258, 6, 272, 11, 15, 22,
134, 44, 11, 15, 16, 8, 197, 1245, 90,
67, 52, 29, 209, 30, 32, 132, 6, 109,
15, 17, 12], dtype=int32)
X_train_pad.shape, X_test_pad.shape
((8982, 120), (2246, 120))
- timesteps : hello 학습시에는 h,e,l,l 이라는 총 4개의 문자가 들어갔기 때문에 4였고, 현재는 각 뉴스기사의 단어 120개를 120번 순환시킬 것이기 때문에 120으로 설정
- features : hello 학습시에는 원핫인코딩을 통해서 문제 데이터의 컬럼이 9개였기 때문에 9로 설정했고, 로이터뉴스 문제 데이터는 숫자 하나로만 표시(레이블인코딩) 되어져 있기 때문에 features는 1이 됨
- '입력'은 문제데이터, '출력'은 정답데이터를 보고 생각하자!
X_train_pad_reshape = X_train_pad.reshape(8982, 120, 1)
X_test_pad_reshape = X_test_pad.reshape(2246, 120, 1)
X_train_pad_reshape.shape, X_test_pad_reshape.shape
((8982, 120, 1), (2246, 120, 1))
RNN 신경망 모델링
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Dense, SimpleRNN
model = Sequential()
model.add(SimpleRNN(500, input_shape=(120, 1)))
# 출력층
# 뉴스기사를 46개의 주제로 분류하는 문제이기 때문에 뉴런의 수는 46개, 활성화함수는 softmax(다중분류)
model.add(Dense(46, activation='softmax'))
model.compile(loss='sparse_categorical_crossentropy',
optimizer='Adam',
metrics=['acc']
)
h = model.fit(X_train_pad_reshape, y_train,
validation_split=0.2,
epochs=50,
batch_size=128
)
plt.figure(figsize=(15,5))
# train 데이터
plt.plot(h.history['acc'],
label='acc',
c = 'blue',
marker='.'
)
# val 데이터
plt.plot(h.history['val_acc'],
label='val_acc',
c = 'red',
marker='.'
)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend()
plt.show()
- SimpleRNN만 가지고는 수십수백개의 단어들로 이루어진 복잡한 데이터를 순서까지 고려해서 다 분류해내기 어려움
LSTM과 Word Embedding을 적용
- SimpleRNN과는 달리 이전의 중요 데이터들을 모두 기억하여 예측에 반영
- LSTM에 덧붙여 뉴스기사의 단어들을 다양하게 표현할 수 있도록 Embedding도 활용
from tensorflow.keras.layers import LSTM, Embedding
model1 = Sequential()
# input_dim : 데이터 내에서 사용할 최대 단어의 개수(뉴스기사 대부분은 1000개 이하의 단어로 구성)
# output_dim : 임베딩 층을 통과한 후 생성된 실수의 개수(단어를 얼마만큼 다양하게 표현해줄수 있느냐)
model1.add(Embedding(input_dim=1000, output_dim=100))
model1.add(LSTM(1000))
model1.add(Dense(46, activation='softmax'))
model1.compile(loss='sparse_categorical_crossentropy',
optimizer='Adam',
metrics=['acc']
)
h1 = model1.fit(X_train_pad_reshape, y_train,
validation_split=0.2,
epochs=50,
batch_size=128
)
plt.figure(figsize=(15,5))
# train 데이터
plt.plot(h1.history['acc'],
label='acc',
c = 'blue',
marker='.'
)
# val 데이터
plt.plot(h1.history['val_acc'],
label='val_acc',
c = 'red',
marker='.'
)
plt.xlabel("epochs")
plt.ylabel("accuracy")
plt.legend()
plt.show()
model1.evaluate(X_test_pad_reshape, y_test)
71/71 [==============================] - 1s 20ms/step - loss: 1.3989 - acc: 0.6986
[1.3989092111587524, 0.6985752582550049]
'딥러닝' 카테고리의 다른 글
[딥러닝] OpenCV (0) | 2022.07.28 |
---|---|
[딥러닝] Simple RNN (0) | 2022.07.26 |
[딥러닝] 데이터 증강(ImageDataGenerator) (0) | 2022.07.26 |
[딥러닝] VGG16 모델 (0) | 2022.07.26 |
[딥러닝] CNN 모델 (0) | 2022.07.26 |