English: Comparison of male and female life expectancy at birth for countries and territories as defined in the 2011 CIA Factbook, with selected bubbles labelled. Hover over a bubble to highlight it and show its data. The green line corresponds to equal female and male life expectancy. The apparent 3D volumes of the bubbles are linearly proportional to their population, i.e. their radii are linearly proportional to the cube root of the population. Data is from https://www.cia.gov/library/publications/the-world-factbook/fields/2102.html and https://www.cia.gov/library/publications/the-world-factbook/fields/2119.html .
Python script to fetch data and update data table

import re, os, urllib2, time, datetime, collections

data_oldss = [line.split('|') for line in '''\
-20|EUROPEAN UNION|80.2|77.4|83.2|515052778|-
-15|DR Congo|57.3|55.8|58.9|81331050|af
|South Africa|63.1|61.6|64.6|54300704|af
|Korea, South|82.4|79.3|85.8|50924172|ea
|Saudi Arabia|75.3|73.2|77.4|28160273|me
|Korea, North|70.4|66.6|74.5|25115311|ea
|Cote d'Ivoire|58.7|57.5|59.9|23740424|af
|Sri Lanka|76.8|73.3|80.4|22235000|as
|Burkina Faso|55.5|53.4|57.6|19512533|af
|Dominican Republic|78.1|75.9|80.5|10606865|la
|Hong Kong|82.9|80.3|85.8|7167403|ea
|Papua New Guinea|67.2|65|69.5|6791317|ea
|El Salvador|74.7|71.4|78.1|6156670|la
|Sierra Leone|58.2|55.6|60.9|6018888|af
|United Arab Emirates|77.5|74.8|80.2|5927482|me
|Central African Republic|52.3|51|53.7|5507257|af
|Costa Rica|78.6|75.9|81.4|4872543|la
|Congo, Republic of the|59.3|58.1|60.6|4852412|af
|New Zealand|81.2|79.1|83.3|4474549|oc
|Bosnia and Herzegovina|76.7|73.7|80|3861912|eu
|Puerto Rico|79.4|75.8|83.1|3578056|la
|West Bank|75|73|77.1|2697687|me
|Gambia, The|64.9|62.5|67.3|2009648|af
|Gaza Strip|73.9|72.3|75.7|1753327|me
|Trinidad and Tobago|72.9|69.9|75.9|1220479|la
|Equatorial Guinea|64.2|63.1|65.4|759451|af
|Solomon Islands|75.3|72.7|78.1|635027|oc
|Western Sahara|63|60.7|65.4|587020|af
|Cabo Verde|72.1|69.8|74.5|553432|af
|Bahamas, The|72.4|70|74.8|327316|la
|French Polynesia|77.2|74.9|79.6|285321|oc
|New Caledonia|77.7|73.7|81.9|275355|oc
|Sao Tome and Principe|64.9|63.6|66.3|197541|af
|Saint Lucia|77.8|75|80.7|164464|la
|Micronesia, Federated States of|72.9|70.8|75|104719|oc
|Virgin Islands|80|77|83.2|102951|la
|Saint Vincent and the Grenadines|75.3|73.3|77.4|102350|la
|Antigua and Barbuda|76.5|74.4|78.8|93581|la
|Isle of Man|81.2|79.5|83|88195|eu
|Marshall Islands|73.1|70.9|75.4|73376|oc
|Cayman Islands|81.2|78.5|84|57268|la
|American Samoa|75.4|72.4|78.5|54194|oc
|Northern Mariana Islands|78|75.3|80.8|53467|oc
|Saint Kitts and Nevis|75.7|73.3|78.2|52329|la
|Turks and Caicos Islands|79.8|77.1|82.7|51430|la
|Faroe Islands|80.4|77.8|83.1|50456|eu
|Sint Maarten|78.1|75.8|80.6|41486|la
|British Virgin Islands|78.6|77.2|80.1|34232|la
|San Marino|83.3|80.7|86.1|33285|eu
|Wallis and Futuna|79.7|76.7|82.8|15664|oc
|Cook Islands|75.8|73|78.8|9556|oc
|Saint Helena, Ascension, and Tristan da Cunha|79.5|76.6|82.6|7795|af
|Saint Pierre and Miquelon|80.5|78.2|83|5595|na
|Falkland Islands (Islas Malvinas)|77.9|75.6|79.6|2931|sa
|Norfolk Island|NA|NA|NA|2210|oc
|Christmas Island|NA|NA|NA|2205|oc
|Cocos (Keeling) Islands|NA|NA|NA|596|oc
|Pitcairn Islands|NA|NA|NA|54|oc

# do_refresh_cache = True

def read_url(url, headers={}, path_cache=None, is_verbose=True):
 if (path_cache is None):
  file_cache = os.path.basename(url)
  path_cache = os.path.join('%s.cache' % (os.path.splitext(__file__)[0]),
                            file_cache if (len(file_cache) > 0) else
                            '%s.htm' % (os.path.basename(url.rstrip('/'))))
 if (('do_refresh_cache' in globals() and do_refresh_cache) or
     (not os.path.isfile(path_cache))):
  request = urllib2.Request(url, headers=headers)
  try:                           html = urllib2.urlopen(request).read()
  except urllib2.HTTPError as e: html = ''; print(e)
  try:            os.makedirs(os.path.dirname(path_cache))
  except OSError: pass
  with open(path_cache, 'wb') as f_html: f_html.write(html)
  if (is_verbose): print('%s > %s' % (url, path_cache))
  time.sleep(1) ## avoid rate-limit-exceeded error
  with open(path_cache) as f_html: html = f_html.read()
  if (is_verbose): print('< %s' % (path_cache))
 try:                       html = html.decode('utf-8')
 except UnicodeDecodeError: pass
 return html
def fmt(string): ## string.format(**vars()) using tags {expression!format} by CMG Lee
 def f(tag): i_sep = tag.rfind('!'); return (re.sub('\.0+$', '', str(eval(tag[1:-1])))
  if (i_sep < 0) else ('{:%s}' % tag[i_sep + 1:-1]).format(eval(tag[1:i_sep])))
 return (re.sub(r'(?<!{){[^{}]+}', lambda m:f(m.group()), string)
         .replace('{{', '{').replace('}}', '}'))
def append(obj, string): return obj.append(fmt(string))
def format_tab(*arg): return '\t'.join([str(el) for el in (arg if len(arg) > 1 else arg[0])])
def tabbify(cellss, separator='|'):
 cellpadss = [list(rows) + [''] * (len(max(cellss, key=len)) - len(rows)) for rows in cellss]
 fmts = ['%%%ds' % (max([len(str(cell)) for cell in cols])) for cols in zip(*cellpadss)]
 return '\n'.join([separator.join(fmts) % tuple(rows) for rows in cellpadss])
def hex_rgb(colour): ## convert [#]RGB to #RRGGBB and [#]RRGGBB to #RRGGBB
 return '#%s' % (colour if len(colour) > 4 else ''.join([c * 2 for c in colour])).lstrip('#')
def try_int_float(field):
 try:     return int(field)
  try:    return float(field)
  except: return field
def roundm(x, multiple=1):
 try: x[0]; return [roundm(element, multiple) for element in x] ## x[0] checks if x is iterable
 except:    return int(math.floor(float(x) / multiple + 0.5)) * multiple

def findall(regex, string):
 return re.findall(regex, string, flags=re.I|re.DOTALL)
def sub(regex_replace, regex_with, string):
 return str(re.sub(regex_replace, regex_with, string, flags=re.DOTALL).strip())

def make_serial(name): return sub(r'[^a-z]', '', name.lower())
def make_table(datass):
 return '\n'.join(['|'.join([str(data) for data in datas]) for datas in datass])

data_newss = {}

html_expectancy = read_url('http://cia.gov/library/publications/resources/the-world-factbook/fields/355.html')
html_expectancyss = findall(r'(<td.+?</td>)\s*(<td.+?</td>)', html_expectancy)
for html_expectancys in html_expectancyss:
 html_divs = findall(r'<div.+?</div>', html_expectancys[1])
 name      = sub(r'<.*?>', '', html_expectancys[0])
 serial    = make_serial(name)
 # expectancy_male   = None
 # expectancy_female = None
 # try:              expectancy_male   = float(findall(r'[\d.]+(?= years)', html_divs[1])[0])
 # except Exception: pass
 # try:              expectancy_female = float(findall(r'[\d.]+(?= years)', html_divs[2])[0])
 # except Exception: pass
 # if (not serial in data_newss): data_newss[serial] = {}
 # data_newss[serial]['male'  ] = expectancy_male
 # data_newss[serial]['female'] = expectancy_female
  expectancy_overall = float(findall(r'(?:[\d.]+(?= years)|\d+\.\d+)', html_divs[0])[0])
  expectancy_male    = float(findall(r'(?:[\d.]+(?= years)|\d+\.\d+)', html_divs[1])[0])
  expectancy_female  = float(findall(r'(?:[\d.]+(?= years)|\d+\.\d+)', html_divs[2])[0])
  if (not serial in data_newss): data_newss[serial] = {}
  data_newss[serial]['overall'] = expectancy_overall
  data_newss[serial]['male'   ] = expectancy_male
  data_newss[serial]['female' ] = expectancy_female
 except Exception: pass

html_population = read_url('http://cia.gov/library/publications/resources/the-world-factbook/fields/335.html')
html_populationss = findall(r'(<td.+?</td>)\s*(<td.+?</td>)', html_population)
for html_populations in html_populationss:
 name   = sub(r'<.*?>', '', html_populations[0])
 serial = make_serial(name)
 # population = None
 # if (not 'no indigenous' in html_populations[1]):
 #  try:              population = int(sub(',','',findall(r'[\d,]+', html_populations[1])[0]))
 #  except Exception: pass
 # if (not serial in data_newss): data_newss[serial] = {}
 # data_newss[serial]['population'] = population
 if (not 'no indigenous' in html_populations[1]):
   population = int(sub(',','',findall(r'[\d,]+', html_populations[1])[0]))
   if (not serial in data_newss): data_newss[serial] = {}
   data_newss[serial]['name']       = name
   data_newss[serial]['population'] = population
  except Exception: pass

outss = []
for serial in sorted(data_newss):
 data_news = data_newss[serial]
 try: outss.append([serial, data_news['name'], data_news['population'],
                    data_news['overall'], data_news['male'], data_news['female']])
                    # data_news['population'] if ('population' in data_news) else None,
                    # data_news['male']       if ('male'       in data_news) else None,
                    # data_news['female']     if ('female'     in data_news) else None])
 except Exception: pass
  # print(data_newss.pop(serial))
# print(tabbify(outss))

outss = []
# print(tabbify(data_oldss))
map_keeps   = {'usa':'unitedstates', 'uk':'unitedkingdom', 'drcongo':'congodemocraticrepublicofthe'}
map_changes = {'swaziland':'eswatini'}

for data_olds in data_oldss:
 name      = data_olds[1]
 serial    = make_serial(name)
 data_news = None
  if (serial in map_keeps): serial = map_keeps[serial]
  if (serial in map_changes):
   serial    = map_changes[serial]
   data_news = data_newss[serial]
   name      = data_news['name']
   data_news = data_newss[serial]
 except Exception: pass
               # data_news['name'      ] if ('name'       in data_news) else 'NA',
               data_news['overall'   ] if ('overall'    in data_news) else 'NA',
               data_news['male'      ] if ('male'       in data_news) else 'NA',
               data_news['female'    ] if ('female'     in data_news) else 'NA',
               data_news['population'] if ('population' in data_news) else 'NA',
 # outss.append(data_olds)
 if (name != data_news['name']): print(name, data_news['name'])
# print(tabbify(outss))
outss = outss[:2] + sorted(outss[2:], key=lambda lines:lines[5], reverse=True)

dir_cache = '%s.cache' % (os.path.splitext(__file__)[0])
with open(os.path.join(dir_cache, 'old.txt'), 'w') as f: f.write(make_table(data_oldss))
with open(os.path.join(dir_cache, 'new.txt'), 'w') as f: f.write(make_table(outss))


表示 継承
このファイルはクリエイティブ・コモンズ 表示-継承 3.0 非移植ライセンスのもとに利用を許諾されています。
  • 共有 – 本作品を複製、頒布、展示、実演できます。
  • 再構成 – 二次的著作物を作成できます。
  • 表示 – あなたは適切なクレジットを表示し、ライセンスへのリンクを提供し、変更があったらその旨を示さなければなりません。これらは合理的であればどのような方法で行っても構いませんが、許諾者があなたやあなたの利用行為を支持していると示唆するような方法は除きます。
  • 継承 – もしあなたがこの作品をリミックスしたり、改変したり、加工した場合には、あなたはあなたの貢献部分を元の作品とこれと同一または互換性があるライセンスの下に頒布しなければなりません。
GNU head この文書は、フリーソフトウェア財団発行のGNUフリー文書利用許諾書 (GNU Free Documentation License) 1.2またはそれ以降のバージョンの規約に基づき、複製や再配布、改変が許可されます。不可変更部分、表紙、背表紙はありません。このライセンスの複製は、GNUフリー文書利用許諾書という章に含まれています。






